diff --git a/.github/codecov.yml b/.github/codecov.yml index 9b75efc791d..f185c5e2dcc 100644 --- a/.github/codecov.yml +++ b/.github/codecov.yml @@ -1,5 +1,5 @@ codecov: - max_report_age: off + max_report_age: "off" strict_yaml_branch: "master" ignore: @@ -14,4 +14,4 @@ ignore: comment: false github_checks: - annotations: false \ No newline at end of file + annotations: false diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml index 50eaf45e2ef..1005c8f6c38 100644 --- a/.github/workflows/anchore-analysis.yml +++ b/.github/workflows/anchore-analysis.yml @@ -8,9 +8,9 @@ name: Docker Container Scan (clickhouse-server) -on: +"on": pull_request: - paths: + paths: - docker/server/Dockerfile - .github/workflows/anchore-analysis.yml schedule: @@ -20,20 +20,20 @@ jobs: Anchore-Build-Scan: runs-on: ubuntu-latest steps: - - name: Checkout the code - uses: actions/checkout@v2 - - name: Build the Docker image - run: | - cd docker/server - perl -pi -e 's|=\$version||g' Dockerfile - docker build . --file Dockerfile --tag localbuild/testimage:latest - - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled - uses: anchore/scan-action@v2 - id: scan - with: - image: "localbuild/testimage:latest" - acs-report-enable: true - - name: Upload Anchore Scan Report - uses: github/codeql-action/upload-sarif@v1 - with: - sarif_file: ${{ steps.scan.outputs.sarif }} + - name: Checkout the code + uses: actions/checkout@v2 + - name: Build the Docker image + run: | + cd docker/server + perl -pi -e 's|=\$version||g' Dockerfile + docker build . --file Dockerfile --tag localbuild/testimage:latest + - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled + uses: anchore/scan-action@v2 + id: scan + with: + image: "localbuild/testimage:latest" + acs-report-enable: true + - name: Upload Anchore Scan Report + uses: github/codeql-action/upload-sarif@v1 + with: + sarif_file: ${{ steps.scan.outputs.sarif }} diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index 633dd47a2d5..00000000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,32 +0,0 @@ -# See the example here: https://github.com/github/codeql-action - -name: "CodeQL Scanning" - -on: - schedule: - - cron: '0 19 * * *' -jobs: - CodeQL-Build: - - runs-on: self-hosted - timeout-minutes: 1440 - - steps: - - name: Checkout repository - uses: actions/checkout@v2 - with: - fetch-depth: 2 - submodules: 'recursive' - - - name: Initialize CodeQL - uses: github/codeql-action/init@v1 - - with: - languages: cpp - - - run: sudo apt-get update && sudo apt-get install -y git cmake python ninja-build gcc-10 g++-10 && mkdir build - - run: cd build && CC=gcc-10 CXX=g++-10 cmake .. - - run: cd build && ninja - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v1 diff --git a/.gitignore b/.gitignore index 1e9765dca9e..d33dbf0600d 100644 --- a/.gitignore +++ b/.gitignore @@ -137,3 +137,9 @@ website/package-lock.json /prof *.iml + +# data store +/programs/server/data +/programs/server/metadata +/programs/server/store + diff --git a/.gitmodules b/.gitmodules index ecccf0633e2..7a2c5600e65 100644 --- a/.gitmodules +++ b/.gitmodules @@ -184,7 +184,7 @@ url = https://github.com/ClickHouse-Extras/krb5 [submodule "contrib/cyrus-sasl"] path = contrib/cyrus-sasl - url = https://github.com/cyrusimap/cyrus-sasl + url = https://github.com/ClickHouse-Extras/cyrus-sasl branch = cyrus-sasl-2.1 [submodule "contrib/croaring"] path = contrib/croaring @@ -220,4 +220,4 @@ url = https://github.com/ClickHouse-Extras/boringssl.git [submodule "contrib/NuRaft"] path = contrib/NuRaft - url = https://github.com/eBay/NuRaft.git + url = https://github.com/ClickHouse-Extras/NuRaft.git diff --git a/.potato.yml b/.potato.yml index 113bdacbdde..7cb87c58bd1 100644 --- a/.potato.yml +++ b/.potato.yml @@ -14,14 +14,14 @@ handlers: # The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker. github:pullRequest:labeled: data: - # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues. - queue: CLICKHOUSEDOCS - # The issue title. - summary: '[Potato] Pull Request #{{pullRequest.number}}' - # The issue description. - description: > + # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues. + queue: CLICKHOUSEDOCS + # The issue title. + summary: '[Potato] Pull Request #{{pullRequest.number}}' + # The issue description. + description: > {{pullRequest.description}} Ссылка на Pull Request: {{pullRequest.webUrl}} - # The condition for creating the Yandex.Tracker issue. - condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length + # The condition for creating the Yandex.Tracker issue. + condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length diff --git a/.yamllint b/.yamllint new file mode 100644 index 00000000000..fe161e71849 --- /dev/null +++ b/.yamllint @@ -0,0 +1,15 @@ +# vi: ft=yaml +extends: default + +rules: + indentation: + level: warning + indent-sequences: consistent + line-length: + # there are some bash -c "", so this is OK + max: 300 + level: warning + comments: + min-spaces-from-content: 1 + document-start: + present: false diff --git a/CHANGELOG.md b/CHANGELOG.md index b328dcf5c88..e2c777b3bcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,180 @@ +## ClickHouse release 21.2 + +### ClickHouse release v21.2.2.8-stable, 2021-02-07 + +#### Backward Incompatible Change + +* Bitwise functions (`bitAnd`, `bitOr`, etc) are forbidden for floating point arguments. Now you have to do explicit cast to integer. [#19853](https://github.com/ClickHouse/ClickHouse/pull/19853) ([Azat Khuzhin](https://github.com/azat)). +* Forbid `lcm`/`gcd` for floats. [#19532](https://github.com/ClickHouse/ClickHouse/pull/19532) ([Azat Khuzhin](https://github.com/azat)). +* Fix memory tracking for `OPTIMIZE TABLE`/merges; account query memory limits and sampling for `OPTIMIZE TABLE`/merges. [#18772](https://github.com/ClickHouse/ClickHouse/pull/18772) ([Azat Khuzhin](https://github.com/azat)). +* Disallow floating point column as partition key, see [#18421](https://github.com/ClickHouse/ClickHouse/issues/18421#event-4147046255). [#18464](https://github.com/ClickHouse/ClickHouse/pull/18464) ([hexiaoting](https://github.com/hexiaoting)). +* Excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. + +#### New Feature + +* Added `PostgreSQL` table engine (both select/insert, with support for multidimensional arrays), also as table function. Added `PostgreSQL` dictionary source. Added `PostgreSQL` database engine. [#18554](https://github.com/ClickHouse/ClickHouse/pull/18554) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Data type `Nested` now supports arbitrary levels of nesting. Introduced subcolumns of complex types, such as `size0` in `Array`, `null` in `Nullable`, names of `Tuple` elements, which can be read without reading of whole column. [#17310](https://github.com/ClickHouse/ClickHouse/pull/17310) ([Anton Popov](https://github.com/CurtizJ)). +* Added `Nullable` support for `FlatDictionary`, `HashedDictionary`, `ComplexKeyHashedDictionary`, `DirectDictionary`, `ComplexKeyDirectDictionary`, `RangeHashedDictionary`. [#18236](https://github.com/ClickHouse/ClickHouse/pull/18236) ([Maksim Kita](https://github.com/kitaisreal)). +* Adds a new table called `system.distributed_ddl_queue` that displays the queries in the DDL worker queue. [#17656](https://github.com/ClickHouse/ClickHouse/pull/17656) ([Bharat Nallan](https://github.com/bharatnc)). +* Added support of mapping LDAP group names, and attribute values in general, to local roles for users from ldap user directories. [#17211](https://github.com/ClickHouse/ClickHouse/pull/17211) ([Denis Glazachev](https://github.com/traceon)). +* Support insert into table function `cluster`, and for both table functions `remote` and `cluster`, support distributing data across nodes by specify sharding key. Close [#16752](https://github.com/ClickHouse/ClickHouse/issues/16752). [#18264](https://github.com/ClickHouse/ClickHouse/pull/18264) ([flynn](https://github.com/ucasFL)). +* Add function `decodeXMLComponent` to decode characters for XML. Example: `SELECT decodeXMLComponent('Hello,"world"!')` [#17659](https://github.com/ClickHouse/ClickHouse/issues/17659). [#18542](https://github.com/ClickHouse/ClickHouse/pull/18542) ([nauta](https://github.com/nautaa)). +* Added functions `parseDateTimeBestEffortUSOrZero`, `parseDateTimeBestEffortUSOrNull`. [#19712](https://github.com/ClickHouse/ClickHouse/pull/19712) ([Maksim Kita](https://github.com/kitaisreal)). +* Add `sign` math function. [#19527](https://github.com/ClickHouse/ClickHouse/pull/19527) ([flynn](https://github.com/ucasFL)). +* Add information about used features (functions, table engines, etc) into system.query_log. [#18495](https://github.com/ClickHouse/ClickHouse/issues/18495). [#19371](https://github.com/ClickHouse/ClickHouse/pull/19371) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Function `formatDateTime` support the `%Q` modification to format date to quarter. [#19224](https://github.com/ClickHouse/ClickHouse/pull/19224) ([Jianmei Zhang](https://github.com/zhangjmruc)). +* Support MetaKey+Enter hotkey binding in play UI. [#19012](https://github.com/ClickHouse/ClickHouse/pull/19012) ([sundyli](https://github.com/sundy-li)). +* Add three functions for map data type: 1. `mapContains(map, key)` to check weather map.keys include the second parameter key. 2. `mapKeys(map)` return all the keys in Array format 3. `mapValues(map)` return all the values in Array format. [#18788](https://github.com/ClickHouse/ClickHouse/pull/18788) ([hexiaoting](https://github.com/hexiaoting)). +* Add `log_comment` setting related to [#18494](https://github.com/ClickHouse/ClickHouse/issues/18494). [#18549](https://github.com/ClickHouse/ClickHouse/pull/18549) ([Zijie Lu](https://github.com/TszKitLo40)). +* Add support of tuple argument to `argMin` and `argMax` functions. [#17359](https://github.com/ClickHouse/ClickHouse/pull/17359) ([Ildus Kurbangaliev](https://github.com/ildus)). +* Support `EXISTS VIEW` syntax. [#18552](https://github.com/ClickHouse/ClickHouse/pull/18552) ([Du Chuan](https://github.com/spongedu)). +* Add `SELECT ALL` syntax. closes [#18706](https://github.com/ClickHouse/ClickHouse/issues/18706). [#18723](https://github.com/ClickHouse/ClickHouse/pull/18723) ([flynn](https://github.com/ucasFL)). + +#### Performance Improvement + +* Faster parts removal by lowering the number of `stat` syscalls. This returns the optimization that existed while ago. More safe interface of `IDisk`. This closes [#19065](https://github.com/ClickHouse/ClickHouse/issues/19065). [#19086](https://github.com/ClickHouse/ClickHouse/pull/19086) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Aliases declared in `WITH` statement are properly used in index analysis. Queries like `WITH column AS alias SELECT ... WHERE alias = ...` may use index now. [#18896](https://github.com/ClickHouse/ClickHouse/pull/18896) ([Amos Bird](https://github.com/amosbird)). +* Add `optimize_alias_column_prediction` (on by default), that will: - Respect aliased columns in WHERE during partition pruning and skipping data using secondary indexes; - Respect aliased columns in WHERE for trivial count queries for optimize_trivial_count; - Respect aliased columns in GROUP BY/ORDER BY for optimize_aggregation_in_order/optimize_read_in_order. [#16995](https://github.com/ClickHouse/ClickHouse/pull/16995) ([sundyli](https://github.com/sundy-li)). +* Speed up aggregate function `sum`. Improvement only visible on synthetic benchmarks and not very practical. [#19216](https://github.com/ClickHouse/ClickHouse/pull/19216) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Update libc++ and use another ABI to provide better performance. [#18914](https://github.com/ClickHouse/ClickHouse/pull/18914) ([Danila Kutenin](https://github.com/danlark1)). +* Rewrite `sumIf()` and `sum(if())` function to `countIf()` function when logically equivalent. [#17041](https://github.com/ClickHouse/ClickHouse/pull/17041) ([flynn](https://github.com/ucasFL)). +* Use a connection pool for S3 connections, controlled by the `s3_max_connections` settings. [#13405](https://github.com/ClickHouse/ClickHouse/pull/13405) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add support for zstd long option for better compression of string columns to save space. [#17184](https://github.com/ClickHouse/ClickHouse/pull/17184) ([ygrek](https://github.com/ygrek)). +* Slightly improve server latency by removing access to configuration on every connection. [#19863](https://github.com/ClickHouse/ClickHouse/pull/19863) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Reduce lock contention for multiple layers of the `Buffer` engine. [#19379](https://github.com/ClickHouse/ClickHouse/pull/19379) ([Azat Khuzhin](https://github.com/azat)). +* Support splitting `Filter` step of query plan into `Expression + Filter` pair. Together with `Expression + Expression` merging optimization ([#17458](https://github.com/ClickHouse/ClickHouse/issues/17458)) it may delay execution for some expressions after `Filter` step. [#19253](https://github.com/ClickHouse/ClickHouse/pull/19253) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### Improvement + +* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* Set charset to `utf8mb4` when interacting with remote MySQL servers. Fixes [#19795](https://github.com/ClickHouse/ClickHouse/issues/19795). [#19800](https://github.com/ClickHouse/ClickHouse/pull/19800) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* `S3` table function now supports `auto` compression mode (autodetect). This closes [#18754](https://github.com/ClickHouse/ClickHouse/issues/18754). [#19793](https://github.com/ClickHouse/ClickHouse/pull/19793) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Correctly output infinite arguments for `formatReadableTimeDelta` function. In previous versions, there was implicit conversion to implementation specific integer value. [#19791](https://github.com/ClickHouse/ClickHouse/pull/19791) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Table function `S3` will use global region if the region can't be determined exactly. This closes [#10998](https://github.com/ClickHouse/ClickHouse/issues/10998). [#19750](https://github.com/ClickHouse/ClickHouse/pull/19750) ([Vladimir Chebotarev](https://github.com/excitoon)). +* In distributed queries if the setting `async_socket_for_remote` is enabled, it was possible to get stack overflow at least in debug build configuration if very deeply nested data type is used in table (e.g. `Array(Array(Array(...more...)))`). This fixes [#19108](https://github.com/ClickHouse/ClickHouse/issues/19108). This change introduces minor backward incompatibility: excessive parenthesis in type definitions no longer supported, example: `Array((UInt8))`. [#19736](https://github.com/ClickHouse/ClickHouse/pull/19736) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add separate pool for message brokers (RabbitMQ and Kafka). [#19722](https://github.com/ClickHouse/ClickHouse/pull/19722) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare `max_number_of_merges_with_ttl_in_pool` limit overrun (more merges with TTL can be assigned) for non-replicated MergeTree. [#19708](https://github.com/ClickHouse/ClickHouse/pull/19708) ([alesapin](https://github.com/alesapin)). +* Dictionary: better error message during attribute parsing. [#19678](https://github.com/ClickHouse/ClickHouse/pull/19678) ([Maksim Kita](https://github.com/kitaisreal)). +* Add an option to disable validation of checksums on reading. Should never be used in production. Please do not expect any benefits in disabling it. It may only be used for experiments and benchmarks. The setting only applicable for tables of MergeTree family. Checksums are always validated for other table engines and when receiving data over network. In my observations there is no performance difference or it is less than 0.5%. [#19588](https://github.com/ClickHouse/ClickHouse/pull/19588) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support constant result in function `multiIf`. [#19533](https://github.com/ClickHouse/ClickHouse/pull/19533) ([Maksim Kita](https://github.com/kitaisreal)). +* Enable function length/empty/notEmpty for datatype Map, which returns keys number in Map. [#19530](https://github.com/ClickHouse/ClickHouse/pull/19530) ([taiyang-li](https://github.com/taiyang-li)). +* Add `--reconnect` option to `clickhouse-benchmark`. When this option is specified, it will reconnect before every request. This is needed for testing. [#19872](https://github.com/ClickHouse/ClickHouse/pull/19872) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Support using the new location of `.debug` file. This fixes [#19348](https://github.com/ClickHouse/ClickHouse/issues/19348). [#19520](https://github.com/ClickHouse/ClickHouse/pull/19520) ([Amos Bird](https://github.com/amosbird)). +* `toIPv6` function parses `IPv4` addresses. [#19518](https://github.com/ClickHouse/ClickHouse/pull/19518) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `http_referer` field to `system.query_log`, `system.processes`, etc. This closes [#19389](https://github.com/ClickHouse/ClickHouse/issues/19389). [#19390](https://github.com/ClickHouse/ClickHouse/pull/19390) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Improve MySQL compatibility by making more functions case insensitive and adding aliases. [#19387](https://github.com/ClickHouse/ClickHouse/pull/19387) ([Daniil Kondratyev](https://github.com/dankondr)). +* Add metrics for MergeTree parts (Wide/Compact/InMemory) types. [#19381](https://github.com/ClickHouse/ClickHouse/pull/19381) ([Azat Khuzhin](https://github.com/azat)). +* Allow docker to be executed with arbitrary uid. [#19374](https://github.com/ClickHouse/ClickHouse/pull/19374) ([filimonov](https://github.com/filimonov)). +* Fix wrong alignment of values of `IPv4` data type in Pretty formats. They were aligned to the right, not to the left. This closes [#19184](https://github.com/ClickHouse/ClickHouse/issues/19184). [#19339](https://github.com/ClickHouse/ClickHouse/pull/19339) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow change `max_server_memory_usage` without restart. This closes [#18154](https://github.com/ClickHouse/ClickHouse/issues/18154). [#19186](https://github.com/ClickHouse/ClickHouse/pull/19186) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* The exception when function `bar` is called with certain NaN argument may be slightly misleading in previous versions. This fixes [#19088](https://github.com/ClickHouse/ClickHouse/issues/19088). [#19107](https://github.com/ClickHouse/ClickHouse/pull/19107) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). +* Fixed `PeekableReadBuffer: Memory limit exceed` error when inserting data with huge strings. Fixes [#18690](https://github.com/ClickHouse/ClickHouse/issues/18690). [#18979](https://github.com/ClickHouse/ClickHouse/pull/18979) ([tavplubix](https://github.com/tavplubix)). +* Docker image: several improvements for clickhouse-server entrypoint. [#18954](https://github.com/ClickHouse/ClickHouse/pull/18954) ([filimonov](https://github.com/filimonov)). +* Add `normalizeQueryKeepNames` and `normalizedQueryHashKeepNames` to normalize queries without masking long names with `?`. This helps better analyze complex query logs. [#18910](https://github.com/ClickHouse/ClickHouse/pull/18910) ([Amos Bird](https://github.com/amosbird)). +* Check per-block checksum of the distributed batch on the sender before sending (without reading the file twice, the checksums will be verified while reading), this will avoid stuck of the INSERT on the receiver (on truncated .bin file on the sender). Avoid reading .bin files twice for batched INSERT (it was required to calculate rows/bytes to take squashing into account, now this information included into the header, backward compatible is preserved). [#18853](https://github.com/ClickHouse/ClickHouse/pull/18853) ([Azat Khuzhin](https://github.com/azat)). +* Fix issues with RIGHT and FULL JOIN of tables with aggregate function states. In previous versions exception about `cloneResized` method was thrown. [#18818](https://github.com/ClickHouse/ClickHouse/pull/18818) ([templarzq](https://github.com/templarzq)). +* Added prefix-based S3 endpoint settings. [#18812](https://github.com/ClickHouse/ClickHouse/pull/18812) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Add [UInt8, UInt16, UInt32, UInt64] arguments types support for bitmapTransform, bitmapSubsetInRange, bitmapSubsetLimit, bitmapContains functions. This closes [#18713](https://github.com/ClickHouse/ClickHouse/issues/18713). [#18791](https://github.com/ClickHouse/ClickHouse/pull/18791) ([sundyli](https://github.com/sundy-li)). +* Allow CTE (Common Table Expressions) to be further aliased. Propagate CSE (Common Subexpressions Elimination) to subqueries in the same level when `enable_global_with_statement = 1`. This fixes [#17378](https://github.com/ClickHouse/ClickHouse/issues/17378) . This fixes https://github.com/ClickHouse/ClickHouse/pull/16575#issuecomment-753416235 . [#18684](https://github.com/ClickHouse/ClickHouse/pull/18684) ([Amos Bird](https://github.com/amosbird)). +* Update librdkafka to v1.6.0-RC2. Fixes [#18668](https://github.com/ClickHouse/ClickHouse/issues/18668). [#18671](https://github.com/ClickHouse/ClickHouse/pull/18671) ([filimonov](https://github.com/filimonov)). +* In case of unexpected exceptions automatically restart background thread which is responsible for execution of distributed DDL queries. Fixes [#17991](https://github.com/ClickHouse/ClickHouse/issues/17991). [#18285](https://github.com/ClickHouse/ClickHouse/pull/18285) ([徐炘](https://github.com/weeds085490)). +* Updated AWS C++ SDK in order to utilize global regions in S3. [#17870](https://github.com/ClickHouse/ClickHouse/pull/17870) ([Vladimir Chebotarev](https://github.com/excitoon)). +* Added support for `WITH ... [AND] [PERIODIC] REFRESH [interval_in_sec]` clause when creating `LIVE VIEW` tables. [#14822](https://github.com/ClickHouse/ClickHouse/pull/14822) ([vzakaznikov](https://github.com/vzakaznikov)). +* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix + +* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Fix starting the server with tables having default expressions containing dictGet(). Allow getting return type of dictGet() without loading dictionary. [#19805](https://github.com/ClickHouse/ClickHouse/pull/19805) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix server crash after query with `if` function with `Tuple` type of then/else branches result. `Tuple` type must contain `Array` or another complex type. Fixes [#18356](https://github.com/ClickHouse/ClickHouse/issues/18356). [#20133](https://github.com/ClickHouse/ClickHouse/pull/20133) ([alesapin](https://github.com/alesapin)). +* `MaterializeMySQL` (experimental feature): Fix replication for statements that update several tables. [#20066](https://github.com/ClickHouse/ClickHouse/pull/20066) ([Håvard Kvålen](https://github.com/havardk)). +* Prevent "Connection refused" in docker during initialization script execution. [#20012](https://github.com/ClickHouse/ClickHouse/pull/20012) ([filimonov](https://github.com/filimonov)). +* `EmbeddedRocksDB` is an experimental storage. Fix the issue with lack of proper type checking. Simplified code. This closes [#19967](https://github.com/ClickHouse/ClickHouse/issues/19967). [#19972](https://github.com/ClickHouse/ClickHouse/pull/19972) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix a segfault in function `fromModifiedJulianDay` when the argument type is `Nullable(T)` for any integral types other than Int32. [#19959](https://github.com/ClickHouse/ClickHouse/pull/19959) ([PHO](https://github.com/depressed-pho)). +* The function `greatCircleAngle` returned inaccurate results in previous versions. This closes [#19769](https://github.com/ClickHouse/ClickHouse/issues/19769). [#19789](https://github.com/ClickHouse/ClickHouse/pull/19789) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix rare bug when some replicated operations (like mutation) cannot process some parts after data corruption. Fixes [#19593](https://github.com/ClickHouse/ClickHouse/issues/19593). [#19702](https://github.com/ClickHouse/ClickHouse/pull/19702) ([alesapin](https://github.com/alesapin)). +* Background thread which executes `ON CLUSTER` queries might hang waiting for dropped replicated table to do something. It's fixed. [#19684](https://github.com/ClickHouse/ClickHouse/pull/19684) ([yiguolei](https://github.com/yiguolei)). +* Fix wrong deserialization of columns description. It makes INSERT into a table with a column named `\` impossible. [#19479](https://github.com/ClickHouse/ClickHouse/pull/19479) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Fixed very rare bug that might cause mutation to hang after `DROP/DETACH/REPLACE/MOVE PARTITION`. It was partially fixed by [#15537](https://github.com/ClickHouse/ClickHouse/issues/15537) for the most cases. [#19443](https://github.com/ClickHouse/ClickHouse/pull/19443) ([tavplubix](https://github.com/tavplubix)). +* Fix possible error `Extremes transform was already added to pipeline`. Fixes [#14100](https://github.com/ClickHouse/ClickHouse/issues/14100). [#19430](https://github.com/ClickHouse/ClickHouse/pull/19430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix default value in join types with non-zero default (e.g. some Enums). Closes [#18197](https://github.com/ClickHouse/ClickHouse/issues/18197). [#19360](https://github.com/ClickHouse/ClickHouse/pull/19360) ([vdimir](https://github.com/vdimir)). +* Do not mark file for distributed send as broken on EOF. [#19290](https://github.com/ClickHouse/ClickHouse/pull/19290) ([Azat Khuzhin](https://github.com/azat)). +* Fix leaking of pipe fd for `async_socket_for_remote`. [#19153](https://github.com/ClickHouse/ClickHouse/pull/19153) ([Azat Khuzhin](https://github.com/azat)). +* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix issue in merge tree data writer which can lead to marks with bigger size than fixed granularity size. Fixes [#18913](https://github.com/ClickHouse/ClickHouse/issues/18913). [#19123](https://github.com/ClickHouse/ClickHouse/pull/19123) ([alesapin](https://github.com/alesapin)). +* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Simplify the implementation of `tupleHammingDistance`. Support for tuples of any equal length. Fixes [#19029](https://github.com/ClickHouse/ClickHouse/issues/19029). [#19084](https://github.com/ClickHouse/ClickHouse/pull/19084) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix minor issue in JOIN: Join tries to materialize const columns, but our code waits for them in other places. [#18982](https://github.com/ClickHouse/ClickHouse/pull/18982) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible exception `QueryPipeline stream: different number of columns` caused by merging of query plan's `Expression` steps. Fixes [#18190](https://github.com/ClickHouse/ClickHouse/issues/18190). [#18980](https://github.com/ClickHouse/ClickHouse/pull/18980) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)). +* Fixed rare crashes when server run out of memory. [#18976](https://github.com/ClickHouse/ClickHouse/pull/18976) ([tavplubix](https://github.com/tavplubix)). +* Fix incorrect behavior when `ALTER TABLE ... DROP PART 'part_name'` query removes all deduplication blocks for the whole partition. Fixes [#18874](https://github.com/ClickHouse/ClickHouse/issues/18874). [#18969](https://github.com/ClickHouse/ClickHouse/pull/18969) ([alesapin](https://github.com/alesapin)). +* Fixed issue [#18894](https://github.com/ClickHouse/ClickHouse/issues/18894) Add a check to avoid exception when long column alias('table.column' style, usually auto-generated by BI tools like Looker) equals to long table name. [#18968](https://github.com/ClickHouse/ClickHouse/pull/18968) ([Daniel Qin](https://github.com/mathfool)). +* Fix error `Task was not found in task queue` (possible only for remote queries, with `async_socket_for_remote = 1`). [#18964](https://github.com/ClickHouse/ClickHouse/pull/18964) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* ATTACH PARTITION will reset mutations. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Fix issue with `bitmapOrCardinality` that may lead to nullptr dereference. This closes [#18911](https://github.com/ClickHouse/ClickHouse/issues/18911). [#18912](https://github.com/ClickHouse/ClickHouse/pull/18912) ([sundyli](https://github.com/sundy-li)). +* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Fix data type convert issue for MySQL engine. [#18124](https://github.com/ClickHouse/ClickHouse/pull/18124) ([bo zeng](https://github.com/mis98zb)). +* Fix clickhouse-client abort exception while executing only `select`. [#19790](https://github.com/ClickHouse/ClickHouse/pull/19790) ([taiyang-li](https://github.com/taiyang-li)). + + +#### Build/Testing/Packaging Improvement + +* Run [SQLancer](https://twitter.com/RiggerManuel/status/1352345625480884228) (logical SQL fuzzer) in CI. [#19006](https://github.com/ClickHouse/ClickHouse/pull/19006) ([Ilya Yatsishin](https://github.com/qoega)). +* Query Fuzzer will fuzz newly added tests more extensively. This closes [#18916](https://github.com/ClickHouse/ClickHouse/issues/18916). [#19185](https://github.com/ClickHouse/ClickHouse/pull/19185) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Integrate with [Big List of Naughty Strings](https://github.com/minimaxir/big-list-of-naughty-strings/) for better fuzzing. [#19480](https://github.com/ClickHouse/ClickHouse/pull/19480) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Add integration tests run with MSan. [#18974](https://github.com/ClickHouse/ClickHouse/pull/18974) ([alesapin](https://github.com/alesapin)). +* Fixed MemorySanitizer errors in cyrus-sasl and musl. [#19821](https://github.com/ClickHouse/ClickHouse/pull/19821) ([Ilya Yatsishin](https://github.com/qoega)). +* Insuffiient arguments check in `positionCaseInsensitiveUTF8` function triggered address sanitizer. [#19720](https://github.com/ClickHouse/ClickHouse/pull/19720) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Remove --project-directory for docker-compose in integration test. Fix logs formatting from docker container. [#19706](https://github.com/ClickHouse/ClickHouse/pull/19706) ([Ilya Yatsishin](https://github.com/qoega)). +* Made generation of macros.xml easier for integration tests. No more excessive logging from dicttoxml. dicttoxml project is not active for 5+ years. [#19697](https://github.com/ClickHouse/ClickHouse/pull/19697) ([Ilya Yatsishin](https://github.com/qoega)). +* Allow to explicitly enable or disable watchdog via environment variable `CLICKHOUSE_WATCHDOG_ENABLE`. By default it is enabled if server is not attached to terminal. [#19522](https://github.com/ClickHouse/ClickHouse/pull/19522) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Allow building ClickHouse with Kafka support on arm64. [#19369](https://github.com/ClickHouse/ClickHouse/pull/19369) ([filimonov](https://github.com/filimonov)). +* Allow building librdkafka without ssl. [#19337](https://github.com/ClickHouse/ClickHouse/pull/19337) ([filimonov](https://github.com/filimonov)). +* Restore Kafka input in FreeBSD builds. [#18924](https://github.com/ClickHouse/ClickHouse/pull/18924) ([Alexandre Snarskii](https://github.com/snar)). +* Fix potential nullptr dereference in table function `VALUES`. [#19357](https://github.com/ClickHouse/ClickHouse/pull/19357) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Avoid UBSan reports in `arrayElement` function, `substring` and `arraySum`. Fixes [#19305](https://github.com/ClickHouse/ClickHouse/issues/19305). Fixes [#19287](https://github.com/ClickHouse/ClickHouse/issues/19287). This closes [#19336](https://github.com/ClickHouse/ClickHouse/issues/19336). [#19347](https://github.com/ClickHouse/ClickHouse/pull/19347) ([alexey-milovidov](https://github.com/alexey-milovidov)). + + ## ClickHouse release 21.1 +### ClickHouse release v21.1.3.32-stable, 2021-02-03 + +#### Bug Fix + +* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix crash when pushing down predicates to union distinct subquery. This fixes [#19855](https://github.com/ClickHouse/ClickHouse/issues/19855). [#19861](https://github.com/ClickHouse/ClickHouse/pull/19861) ([Amos Bird](https://github.com/amosbird)). +* Fix filtering by UInt8 greater than 127. [#19799](https://github.com/ClickHouse/ClickHouse/pull/19799) ([Anton Popov](https://github.com/CurtizJ)). +* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)). +* Fix crash when nested column name was used in `WHERE` or `PREWHERE`. Fixes [#19755](https://github.com/ClickHouse/ClickHouse/issues/19755). [#19763](https://github.com/ClickHouse/ClickHouse/pull/19763) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)). +* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Uninitialized memory read was possible in encrypt/decrypt functions if empty string was passed as IV. This closes [#19391](https://github.com/ClickHouse/ClickHouse/issues/19391). [#19397](https://github.com/ClickHouse/ClickHouse/pull/19397) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Fixed possible wrong result or segfault on aggregation when Materialized View and its target table have different structure. Fixes [#18063](https://github.com/ClickHouse/ClickHouse/issues/18063). [#19322](https://github.com/ClickHouse/ClickHouse/pull/19322) ([tavplubix](https://github.com/tavplubix)). +* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)). +* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). + + + ### ClickHouse release v21.1.2.15-stable 2021-01-18 #### Backward Incompatible Change diff --git a/README.md b/README.md index 8e114d5abe9..3329a98877f 100644 --- a/README.md +++ b/README.md @@ -8,12 +8,8 @@ ClickHouse® is an open-source column-oriented database management system that a * [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster. * [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. -* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-d2zxkf9e-XyxDa_ucfPxzuH4SJIm~Ng) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. +* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-ly9m4w1x-6j7x5Ts_pQZqrctAbRZ3cg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation. -* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. - -## Upcoming Events -* [Chinese ClickHouse Meetup (online)](http://hdxu.cn/8KxZE) on 6 February 2021. diff --git a/base/common/DateLUTImpl.h b/base/common/DateLUTImpl.h index b5d4be950b5..064787fb64e 100644 --- a/base/common/DateLUTImpl.h +++ b/base/common/DateLUTImpl.h @@ -7,6 +7,7 @@ #include #include + #define DATE_LUT_MAX (0xFFFFFFFFU - 86400) #define DATE_LUT_MAX_DAY_NUM (0xFFFFFFFFU / 86400) /// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check. @@ -249,7 +250,7 @@ public: { DayNum index = findIndex(t); - if (unlikely(index == 0)) + if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM)) return t + offset_at_start_of_epoch; time_t res = t - lut[index].date; @@ -264,18 +265,43 @@ public: { DayNum index = findIndex(t); - /// If it is not 1970 year (findIndex found nothing appropriate), - /// than limit number of hours to avoid insane results like 1970-01-01 89:28:15 - if (unlikely(index == 0)) + /// If it is overflow case, + /// then limit number of hours to avoid insane results like 1970-01-01 89:28:15 + if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM)) return static_cast((t + offset_at_start_of_epoch) / 3600) % 24; - time_t res = t - lut[index].date; + time_t time = t - lut[index].date; - /// Data is cleaned to avoid possibility of underflow. - if (res >= lut[index].time_at_offset_change) + if (time >= lut[index].time_at_offset_change) + time += lut[index].amount_of_offset_change; + + unsigned res = time / 3600; + return res <= 23 ? res : 0; + } + + /** Calculating offset from UTC in seconds. + * which means Using the same literal time of "t" to get the corresponding timestamp in UTC, + * then subtract the former from the latter to get the offset result. + * The boundaries when meets DST(daylight saving time) change should be handled very carefully. + */ + inline time_t timezoneOffset(time_t t) const + { + DayNum index = findIndex(t); + + /// Calculate daylight saving offset first. + /// Because the "amount_of_offset_change" in LUT entry only exists in the change day, it's costly to scan it from the very begin. + /// but we can figure out all the accumulated offsets from 1970-01-01 to that day just by get the whole difference between lut[].date, + /// and then, we can directly subtract multiple 86400s to get the real DST offsets for the leap seconds is not considered now. + time_t res = (lut[index].date - lut[0].date) % 86400; + /// As so far to know, the maximal DST offset couldn't be more than 2 hours, so after the modulo operation the remainder + /// will sits between [-offset --> 0 --> offset] which respectively corresponds to moving clock forward or backward. + res = res > 43200 ? (86400 - res) : (0 - res); + + /// Check if has a offset change during this day. Add the change when cross the line + if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change) res += lut[index].amount_of_offset_change; - return res / 3600; + return res + offset_at_start_of_epoch; } /** Only for time zones with/when offset from UTC is multiple of five minutes. @@ -289,12 +315,12 @@ public: * each minute, with added or subtracted leap second, spans exactly 60 unix timestamps. */ - inline unsigned toSecond(time_t t) const { return t % 60; } + inline unsigned toSecond(time_t t) const { return UInt32(t) % 60; } inline unsigned toMinute(time_t t) const { if (offset_is_whole_number_of_hours_everytime) - return (t / 60) % 60; + return (UInt32(t) / 60) % 60; UInt32 date = find(t).date; return (UInt32(t) - date) / 60 % 60; @@ -530,9 +556,7 @@ public: } } - /* - * check and change mode to effective - */ + /// Check and change mode to effective. inline UInt8 check_week_mode(UInt8 mode) const { UInt8 week_format = (mode & 7); @@ -541,10 +565,9 @@ public: return week_format; } - /* - * Calc weekday from d - * Returns 0 for monday, 1 for tuesday ... - */ + /** Calculate weekday from d. + * Returns 0 for monday, 1 for tuesday... + */ inline unsigned calc_weekday(DayNum d, bool sunday_first_day_of_week) const { if (!sunday_first_day_of_week) @@ -553,7 +576,7 @@ public: return toDayOfWeek(DayNum(d + 1)) - 1; } - /* Calc days in one year. */ + /// Calculate days in one year. inline unsigned calc_days_in_year(UInt16 year) const { return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365); diff --git a/base/common/LocalDate.h b/base/common/LocalDate.h index a063d6e98a3..e5ebe877bc5 100644 --- a/base/common/LocalDate.h +++ b/base/common/LocalDate.h @@ -168,14 +168,6 @@ public: static_assert(sizeof(LocalDate) == 4); -inline std::ostream & operator<< (std::ostream & ostr, const LocalDate & date) -{ - return ostr << date.year() - << '-' << (date.month() / 10) << (date.month() % 10) - << '-' << (date.day() / 10) << (date.day() % 10); -} - - namespace std { inline string to_string(const LocalDate & date) diff --git a/base/common/LocalDateTime.h b/base/common/LocalDateTime.h index d19d862f2ca..0e237789bd1 100644 --- a/base/common/LocalDateTime.h +++ b/base/common/LocalDateTime.h @@ -169,20 +169,6 @@ public: static_assert(sizeof(LocalDateTime) == 8); -inline std::ostream & operator<< (std::ostream & ostr, const LocalDateTime & datetime) -{ - ostr << std::setfill('0') << std::setw(4) << datetime.year(); - - ostr << '-' << (datetime.month() / 10) << (datetime.month() % 10) - << '-' << (datetime.day() / 10) << (datetime.day() % 10) - << ' ' << (datetime.hour() / 10) << (datetime.hour() % 10) - << ':' << (datetime.minute() / 10) << (datetime.minute() % 10) - << ':' << (datetime.second() / 10) << (datetime.second() % 10); - - return ostr; -} - - namespace std { inline string to_string(const LocalDateTime & datetime) diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp index 28c7990c353..fcd1610e589 100644 --- a/base/common/ReplxxLineReader.cpp +++ b/base/common/ReplxxLineReader.cpp @@ -12,6 +12,8 @@ #include #include #include +#include + namespace { @@ -189,8 +191,8 @@ void ReplxxLineReader::openEditor() return; } - String editor = std::getenv("EDITOR"); - if (editor.empty()) + const char * editor = std::getenv("EDITOR"); + if (!editor || !*editor) editor = "vim"; replxx::Replxx::State state(rx.get_state()); @@ -204,7 +206,7 @@ void ReplxxLineReader::openEditor() if ((-1 == res || 0 == res) && errno != EINTR) { rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString(errno).c_str()); - return; + break; } bytes_written += res; } @@ -215,7 +217,7 @@ void ReplxxLineReader::openEditor() return; } - if (0 == execute(editor + " " + filename)) + if (0 == execute(fmt::format("{} {}", editor, filename))) { try { diff --git a/base/common/arithmeticOverflow.h b/base/common/arithmeticOverflow.h index 8df037a14af..a92fe56b9cb 100644 --- a/base/common/arithmeticOverflow.h +++ b/base/common/arithmeticOverflow.h @@ -1,9 +1,30 @@ #pragma once #include +#include + namespace common { + /// Multiply and ignore overflow. + template + inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y) + { + return x * y; + } + + template + inline auto NO_SANITIZE_UNDEFINED addIgnoreOverflow(T1 x, T2 y) + { + return x + y; + } + + template + inline auto NO_SANITIZE_UNDEFINED subIgnoreOverflow(T1 x, T2 y) + { + return x - y; + } + template inline bool addOverflow(T x, T y, T & res) { @@ -33,14 +54,14 @@ namespace common { static constexpr __int128 min_int128 = minInt128(); static constexpr __int128 max_int128 = maxInt128(); - res = x + y; + res = addIgnoreOverflow(x, y); return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y); } template <> inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res) { - res = x + y; + res = addIgnoreOverflow(x, y); return (y > 0 && x > std::numeric_limits::max() - y) || (y < 0 && x < std::numeric_limits::min() - y); } @@ -48,7 +69,7 @@ namespace common template <> inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) { - res = x + y; + res = addIgnoreOverflow(x, y); return x > std::numeric_limits::max() - y; } @@ -81,14 +102,14 @@ namespace common { static constexpr __int128 min_int128 = minInt128(); static constexpr __int128 max_int128 = maxInt128(); - res = x - y; + res = subIgnoreOverflow(x, y); return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y); } template <> inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res) { - res = x - y; + res = subIgnoreOverflow(x, y); return (y < 0 && x > std::numeric_limits::max() + y) || (y > 0 && x < std::numeric_limits::min() + y); } @@ -96,7 +117,7 @@ namespace common template <> inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) { - res = x - y; + res = subIgnoreOverflow(x, y); return x < y; } @@ -127,33 +148,33 @@ namespace common template <> inline bool mulOverflow(__int128 x, __int128 y, __int128 & res) { - res = static_cast(x) * static_cast(y); /// Avoid signed integer overflow. + res = mulIgnoreOverflow(x, y); if (!x || !y) return false; unsigned __int128 a = (x > 0) ? x : -x; unsigned __int128 b = (y > 0) ? y : -y; - return (a * b) / b != a; + return mulIgnoreOverflow(a, b) / b != a; } template <> inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res) { - res = x * y; + res = mulIgnoreOverflow(x, y); if (!x || !y) return false; wInt256 a = (x > 0) ? x : -x; wInt256 b = (y > 0) ? y : -y; - return (a * b) / b != a; + return mulIgnoreOverflow(a, b) / b != a; } template <> inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) { - res = x * y; + res = mulIgnoreOverflow(x, y); if (!x || !y) return false; - return (x * y) / y != x; + return res / y != x; } } diff --git a/base/common/defines.h b/base/common/defines.h index 39df4698b88..845a53179ef 100644 --- a/base/common/defines.h +++ b/base/common/defines.h @@ -84,10 +84,12 @@ # define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined"))) # define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address"))) # define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread"))) +# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined"))) #else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it. # define NO_SANITIZE_UNDEFINED # define NO_SANITIZE_ADDRESS # define NO_SANITIZE_THREAD +# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE #endif /// A template function for suppressing warnings about unused variables or function results. diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 4cf8a8d7ce9..83384038b7c 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -152,7 +152,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context) if (sig != SIGTSTP) /// This signal is used for debugging. { /// The time that is usually enough for separate thread to print info into log. - sleepForSeconds(10); + sleepForSeconds(20); /// FIXME: use some feedback from threads that process stacktrace call_default_signal_handler(sig); } @@ -230,10 +230,10 @@ public: } else { - siginfo_t info; - ucontext_t context; + siginfo_t info{}; + ucontext_t context{}; StackTrace stack_trace(NoCapture{}); - UInt32 thread_num; + UInt32 thread_num{}; std::string query_id; DB::ThreadStatus * thread_ptr{}; @@ -311,7 +311,8 @@ private: if (stack_trace.getSize()) { /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace. - /// NOTE This still require memory allocations and mutex lock inside logger. BTW we can also print it to stderr using write syscalls. + /// NOTE: This still require memory allocations and mutex lock inside logger. + /// BTW we can also print it to stderr using write syscalls. std::stringstream bare_stacktrace; bare_stacktrace << "Stack trace:"; @@ -324,7 +325,7 @@ private: /// Write symbolized stack trace line by line for better grep-ability. stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); -#if defined(__linux__) +#if defined(OS_LINUX) /// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace. String calculated_binary_hash = getHashOfLoadedBinaryHex(); if (daemon.stored_binary_hash.empty()) @@ -415,7 +416,9 @@ static void sanitizerDeathCallback() else log_message = "Terminate called without an active exception"; - static const size_t buf_size = 1024; + /// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe + /// And the buffer should not be too small because our exception messages can be large. + static constexpr size_t buf_size = PIPE_BUF; if (log_message.size() > buf_size - 16) log_message.resize(buf_size - 16); @@ -561,6 +564,7 @@ void debugIncreaseOOMScore() { DB::WriteBufferFromFile buf("/proc/self/oom_score_adj"); buf.write(new_score.c_str(), new_score.size()); + buf.close(); } catch (const Poco::Exception & e) { @@ -783,7 +787,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing() /// Setup signal handlers. /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime. - addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler, &handled_signals); + addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals); addSignalHandler({SIGHUP, SIGUSR1}, closeLogsSignalHandler, &handled_signals); addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals); @@ -986,7 +990,7 @@ void BaseDaemon::setupWatchdog() if (errno == ECHILD) { logger().information("Child process no longer exists."); - _exit(status); + _exit(WEXITSTATUS(status)); } if (WIFEXITED(status)) @@ -1020,7 +1024,7 @@ void BaseDaemon::setupWatchdog() /// Automatic restart is not enabled but you can play with it. #if 1 - _exit(status); + _exit(WEXITSTATUS(status)); #else logger().information("Will restart."); if (argv0) diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h index 42d94629ae9..8b9d765cf2e 100644 --- a/base/daemon/BaseDaemon.h +++ b/base/daemon/BaseDaemon.h @@ -83,7 +83,7 @@ public: template void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "") { - auto writer = getGraphiteWriter(config_name); + auto *writer = getGraphiteWriter(config_name); if (writer) writer->write(key, value, timestamp, custom_root_path); } @@ -91,7 +91,7 @@ public: template void writeToGraphite(const GraphiteWriter::KeyValueVector & key_vals, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "") { - auto writer = getGraphiteWriter(config_name); + auto *writer = getGraphiteWriter(config_name); if (writer) writer->write(key_vals, timestamp, custom_root_path); } @@ -99,7 +99,7 @@ public: template void writeToGraphite(const GraphiteWriter::KeyValueVector & key_vals, const std::chrono::system_clock::time_point & current_time, const std::string & custom_root_path) { - auto writer = getGraphiteWriter(); + auto *writer = getGraphiteWriter(); if (writer) writer->write(key_vals, std::chrono::system_clock::to_time_t(current_time), custom_root_path); } diff --git a/base/glibc-compatibility/musl/sched_getcpu.c b/base/glibc-compatibility/musl/sched_getcpu.c index 57b8b416043..f290f01d153 100644 --- a/base/glibc-compatibility/musl/sched_getcpu.c +++ b/base/glibc-compatibility/musl/sched_getcpu.c @@ -31,7 +31,7 @@ static void *volatile vdso_func = (void *)getcpu_init; int sched_getcpu(void) { int r; - unsigned cpu; + unsigned cpu = 0; #ifdef VDSO_GETCPU_SYM getcpu_f f = (getcpu_f)vdso_func; diff --git a/base/mysqlxx/CMakeLists.txt b/base/mysqlxx/CMakeLists.txt index b410c38cfad..849c58a8527 100644 --- a/base/mysqlxx/CMakeLists.txt +++ b/base/mysqlxx/CMakeLists.txt @@ -3,7 +3,6 @@ add_library (mysqlxx Exception.cpp Query.cpp ResultBase.cpp - StoreQueryResult.cpp UseQueryResult.cpp Row.cpp Value.cpp diff --git a/base/mysqlxx/Connection.cpp b/base/mysqlxx/Connection.cpp index 55757008562..8a15115cb06 100644 --- a/base/mysqlxx/Connection.cpp +++ b/base/mysqlxx/Connection.cpp @@ -116,8 +116,8 @@ void Connection::connect(const char* db, if (!mysql_real_connect(driver.get(), server, user, password, db, port, ifNotEmpty(socket), driver->client_flag)) throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get())); - /// Sets UTF-8 as default encoding. - if (mysql_set_character_set(driver.get(), "UTF8")) + /// Sets UTF-8 as default encoding. See https://mariadb.com/kb/en/mysql_set_character_set/ + if (mysql_set_character_set(driver.get(), "utf8mb4")) throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get())); is_connected = true; diff --git a/base/mysqlxx/Connection.h b/base/mysqlxx/Connection.h index 0e5a608108c..ca67db0e0c6 100644 --- a/base/mysqlxx/Connection.h +++ b/base/mysqlxx/Connection.h @@ -39,7 +39,6 @@ private: /** MySQL connection. * Usage: * mysqlxx::Connection connection("Test", "127.0.0.1", "root", "qwerty", 3306); - * std::cout << connection.query("SELECT 'Hello, World!'").store().at(0).at(0).getString() << std::endl; * * Or with Poco library configuration: * mysqlxx::Connection connection("mysql_params"); diff --git a/base/mysqlxx/Query.cpp b/base/mysqlxx/Query.cpp index ab9bb174d4a..f3485c54edc 100644 --- a/base/mysqlxx/Query.cpp +++ b/base/mysqlxx/Query.cpp @@ -71,16 +71,6 @@ UseQueryResult Query::use() return UseQueryResult(res, conn, this); } -StoreQueryResult Query::store() -{ - executeImpl(); - MYSQL_RES * res = mysql_store_result(conn->getDriver()); - if (!res) - checkError(conn->getDriver()); - - return StoreQueryResult(res, conn, this); -} - void Query::execute() { executeImpl(); diff --git a/base/mysqlxx/Query.h b/base/mysqlxx/Query.h index 1d3ab9678d5..036e8952bc3 100644 --- a/base/mysqlxx/Query.h +++ b/base/mysqlxx/Query.h @@ -3,7 +3,6 @@ #include #include -#include namespace mysqlxx @@ -46,11 +45,6 @@ public: */ UseQueryResult use(); - /** Выполнить запрос с загрузкой на клиента всех строк. - * Требуется оперативка, чтобы вместить весь результат, зато к строкам можно обращаться в произвольном порядке. - */ - StoreQueryResult store(); - /// Значение auto increment после последнего INSERT-а. UInt64 insertID(); diff --git a/base/mysqlxx/ResultBase.h b/base/mysqlxx/ResultBase.h index 4f2ab2eb0a2..d08922a269c 100644 --- a/base/mysqlxx/ResultBase.h +++ b/base/mysqlxx/ResultBase.h @@ -9,7 +9,7 @@ class Connection; class Query; -/** Базовый класс для UseQueryResult и StoreQueryResult. +/** Базовый класс для UseQueryResult. * Содержит общую часть реализации, * Ссылается на Connection. Если уничтожить Connection, то пользоваться ResultBase и любым результатом нельзя. * Использовать объект можно только для результата одного запроса! diff --git a/base/mysqlxx/Row.h b/base/mysqlxx/Row.h index a0b88638546..d668fdbd29a 100644 --- a/base/mysqlxx/Row.h +++ b/base/mysqlxx/Row.h @@ -35,7 +35,7 @@ public: { } - /** Для того, чтобы создать Row, используйте соответствующие методы UseQueryResult или StoreQueryResult. */ + /** Для того, чтобы создать Row, используйте соответствующие методы UseQueryResult. */ Row(MYSQL_ROW row_, ResultBase * res_, MYSQL_LENGTHS lengths_) : row(row_), res(res_), lengths(lengths_) { diff --git a/base/mysqlxx/StoreQueryResult.cpp b/base/mysqlxx/StoreQueryResult.cpp deleted file mode 100644 index 620ed8def56..00000000000 --- a/base/mysqlxx/StoreQueryResult.cpp +++ /dev/null @@ -1,30 +0,0 @@ -#if __has_include() -#include -#else -#include -#endif - -#include -#include - - -namespace mysqlxx -{ - -StoreQueryResult::StoreQueryResult(MYSQL_RES * res_, Connection * conn_, const Query * query_) : ResultBase(res_, conn_, query_) -{ - UInt64 rows = mysql_num_rows(res); - reserve(rows); - lengths.resize(rows * num_fields); - - for (UInt64 i = 0; MYSQL_ROW row = mysql_fetch_row(res); ++i) - { - MYSQL_LENGTHS lengths_for_row = mysql_fetch_lengths(res); - memcpy(&lengths[i * num_fields], lengths_for_row, sizeof(lengths[0]) * num_fields); - - push_back(Row(row, this, &lengths[i * num_fields])); - } - checkError(conn->getDriver()); -} - -} diff --git a/base/mysqlxx/StoreQueryResult.h b/base/mysqlxx/StoreQueryResult.h deleted file mode 100644 index 9c242d2782f..00000000000 --- a/base/mysqlxx/StoreQueryResult.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once - -#include - -#include -#include - - -namespace mysqlxx -{ - -class Connection; - - -/** Результат выполнения запроса, загруженный полностью на клиента. - * Это требует оперативку, чтобы вместить весь результат, - * но зато реализует произвольный доступ к строкам по индексу. - * Если размер результата большой - используйте лучше UseQueryResult. - * Объект содержит ссылку на Connection. - * Если уничтожить Connection, то объект становится некорректным и все строки результата - тоже. - * Если задать следующий запрос в соединении, то объект и все строки тоже становятся некорректными. - * Использовать объект можно только для результата одного запроса! - * (При попытке присвоить объекту результат следующего запроса - UB.) - */ -class StoreQueryResult : public std::vector, public ResultBase -{ -public: - StoreQueryResult(MYSQL_RES * res_, Connection * conn_, const Query * query_); - - size_t num_rows() const { return size(); } - -private: - - /** Не смотря на то, что весь результат выполнения запроса загружается на клиента, - * и все указатели MYSQL_ROW на отдельные строки различные, - * при этом функция mysql_fetch_lengths() возвращает длины - * для текущей строки по одному и тому же адресу. - * То есть, чтобы можно было пользоваться несколькими Row одновременно, - * необходимо заранее куда-то сложить все длины. - */ - using Lengths = std::vector; - Lengths lengths; -}; - -} diff --git a/base/mysqlxx/UseQueryResult.h b/base/mysqlxx/UseQueryResult.h index 3a641020dcf..37cbbd19669 100644 --- a/base/mysqlxx/UseQueryResult.h +++ b/base/mysqlxx/UseQueryResult.h @@ -12,8 +12,7 @@ class Connection; /** Результат выполнения запроса, предназначенный для чтения строк, одна за другой. * В памяти при этом хранится только одна, текущая строка. - * В отличие от StoreQueryResult, произвольный доступ к строкам невозможен, - * а также, при чтении следующей строки, предыдущая становится некорректной. + * При чтении следующей строки, предыдущая становится некорректной. * Вы обязаны прочитать все строки из результата * (вызывать функцию fetch(), пока она не вернёт значение, преобразующееся к false), * иначе при следующем запросе будет выкинуто исключение с текстом "Commands out of sync". diff --git a/base/mysqlxx/Value.h b/base/mysqlxx/Value.h index dfa86e8aa7d..57cfd452045 100644 --- a/base/mysqlxx/Value.h +++ b/base/mysqlxx/Value.h @@ -25,7 +25,7 @@ class ResultBase; /** Represents a single value read from MySQL. * It doesn't owns the value. It's just a wrapper of a pair (const char *, size_t). - * If the UseQueryResult/StoreQueryResult or Connection is destroyed, + * If the UseQueryResult or Connection is destroyed, * or you have read the next Row while using UseQueryResult, then the object is invalidated. * Allows to transform (parse) the value to various data types: * - with getUInt(), getString(), ... (recommended); diff --git a/base/mysqlxx/tests/mysqlxx_test.cpp b/base/mysqlxx/tests/mysqlxx_test.cpp index cf304a5cb5f..c505d34a58d 100644 --- a/base/mysqlxx/tests/mysqlxx_test.cpp +++ b/base/mysqlxx/tests/mysqlxx_test.cpp @@ -38,15 +38,6 @@ int main(int, char **) } } - { - mysqlxx::Query query = connection.query(); - query << "SELECT 1234567890 abc, 12345.67890 def UNION ALL SELECT 9876543210, 98765.43210"; - mysqlxx::StoreQueryResult result = query.store(); - - std::cerr << result.at(0)["abc"].getUInt() << ", " << result.at(0)["def"].getDouble() << std::endl - << result.at(1)["abc"].getUInt() << ", " << result.at(1)["def"].getDouble() << std::endl; - } - { mysqlxx::UseQueryResult result = connection.query("SELECT 'abc\\\\def' x").use(); mysqlxx::Row row = result.fetch(); @@ -54,27 +45,6 @@ int main(int, char **) std::cerr << row << std::endl; } - { - mysqlxx::Query query = connection.query("SEL"); - query << "ECT 1"; - - std::cerr << query.store().at(0).at(0) << std::endl; - } - - { - /// Копирование Query - mysqlxx::Query query = connection.query("SELECT 'Ok' x"); - using Queries = std::vector; - Queries queries; - queries.push_back(query); - - for (auto & q : queries) - { - std::cerr << q.str() << std::endl; - std::cerr << q.store().at(0) << std::endl; - } - } - { /// Копирование Query mysqlxx::Query query1 = connection.query("SELECT"); @@ -84,62 +54,6 @@ int main(int, char **) std::cerr << query1.str() << ", " << query2.str() << std::endl; } - { - /// Копирование Query - using Queries = std::list; - Queries queries; - queries.push_back(connection.query("SELECT")); - mysqlxx::Query & qref = queries.back(); - qref << " 1"; - - for (auto & query : queries) - { - std::cerr << query.str() << std::endl; - std::cerr << query.store().at(0) << std::endl; - } - } - - { - /// Транзакции - connection.query("DROP TABLE IF EXISTS tmp").execute(); - connection.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute(); - - mysqlxx::Transaction trans(connection); - connection.query("INSERT INTO tmp VALUES (1)").execute(); - - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - - trans.rollback(); - - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - } - - { - /// Транзакции - connection.query("DROP TABLE IF EXISTS tmp").execute(); - connection.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute(); - - { - mysqlxx::Transaction trans(connection); - connection.query("INSERT INTO tmp VALUES (1)").execute(); - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - } - - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - } - - { - /// Транзакции - mysqlxx::Connection connection2("test", "127.0.0.1", "root", "qwerty", 3306); - connection2.query("DROP TABLE IF EXISTS tmp").execute(); - connection2.query("CREATE TABLE tmp (x INT, PRIMARY KEY (x)) ENGINE = InnoDB").execute(); - - mysqlxx::Transaction trans(connection2); - connection2.query("INSERT INTO tmp VALUES (1)").execute(); - std::cerr << connection2.query("SELECT * FROM tmp").store().size() << std::endl; - } - std::cerr << connection.query("SELECT * FROM tmp").store().size() << std::endl; - { /// NULL mysqlxx::Null x = mysqlxx::null; @@ -152,59 +66,6 @@ int main(int, char **) std::cerr << (x == 1 ? "Ok" : "Fail") << std::endl; std::cerr << (x.isNull() ? "Fail" : "Ok") << std::endl; } - - { - /// Исключения при попытке достать значение не того типа - try - { - connection.query("SELECT -1").store().at(0).at(0).getUInt(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - - try - { - connection.query("SELECT 'xxx'").store().at(0).at(0).getInt(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - - try - { - connection.query("SELECT NULL").store().at(0).at(0).getString(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - - try - { - connection.query("SELECT 123").store().at(0).at(0).getDate(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - - try - { - connection.query("SELECT '2011-01-01'").store().at(0).at(0).getDateTime(); - std::cerr << "Fail" << std::endl; - } - catch (const mysqlxx::Exception & e) - { - std::cerr << "Ok, " << e.message() << std::endl; - } - } } catch (const mysqlxx::Exception & e) { diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 5d643cc4bee..ce92ae203ea 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,9 +1,9 @@ # This strings autochanged from release_lib.sh: -SET(VERSION_REVISION 54447) +SET(VERSION_REVISION 54448) SET(VERSION_MAJOR 21) -SET(VERSION_MINOR 2) +SET(VERSION_MINOR 3) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 53d0c9fa7255aa1dc48991d19f4246ff71cc2fd7) -SET(VERSION_DESCRIBE v21.2.1.1-prestable) -SET(VERSION_STRING 21.2.1.1) +SET(VERSION_GITHASH ef72ba7349f230321750c13ee63b49a11a7c0adc) +SET(VERSION_DESCRIBE v21.3.1.1-prestable) +SET(VERSION_STRING 21.3.1.1) # end of autochange diff --git a/cmake/find/ccache.cmake b/cmake/find/ccache.cmake index d8e9cf9588d..d9ccd1a9ac6 100644 --- a/cmake/find/ccache.cmake +++ b/cmake/find/ccache.cmake @@ -37,15 +37,13 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE) # # - 4.0+ ccache always includes this environment variable into the hash # of the manifest, which do not allow to use previous cache, - # - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness. + # - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__ # # So for: - # - 4.2+ time_macros sloppiness is used, + # - 4.2+ does not require any sloppiness # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable. if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2") - message(STATUS "Use time_macros sloppiness for ccache") - set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros") - set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros") + message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required") elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0") message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}") diff --git a/cmake/find/nuraft.cmake b/cmake/find/nuraft.cmake index d31fe9c1de8..7fa5251946e 100644 --- a/cmake/find/nuraft.cmake +++ b/cmake/find/nuraft.cmake @@ -11,7 +11,7 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/CMakeLists.txt") return() endif () -if (NOT OS_FREEBSD) +if (NOT OS_FREEBSD AND NOT OS_DARWIN) set (USE_NURAFT 1) set (NURAFT_LIBRARY nuraft) @@ -20,5 +20,5 @@ if (NOT OS_FREEBSD) message (STATUS "Using NuRaft=${USE_NURAFT}: ${NURAFT_INCLUDE_DIR} : ${NURAFT_LIBRARY}") else() set (USE_NURAFT 0) - message (STATUS "Using internal NuRaft library on FreeBSD is not supported") + message (STATUS "Using internal NuRaft library on FreeBSD and Darwin is not supported") endif() diff --git a/contrib/NuRaft b/contrib/NuRaft index 410bd149da8..7adf7ae33e7 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 410bd149da84cdde60b4436b02b738749f4e87e1 +Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793 diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt index 63b4e324d29..a295ee45b84 100644 --- a/contrib/base64-cmake/CMakeLists.txt +++ b/contrib/base64-cmake/CMakeLists.txt @@ -11,7 +11,7 @@ endif () target_compile_options(base64_scalar PRIVATE -falign-loops) if (ARCH_AMD64) - target_compile_options(base64_ssse3 PRIVATE -mssse3 -falign-loops) + target_compile_options(base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops) target_compile_options(base64_avx PRIVATE -falign-loops -mavx) target_compile_options(base64_avx2 PRIVATE -falign-loops -mavx2) else () diff --git a/contrib/boost b/contrib/boost index 8e259cd2a6b..48f40ebb539 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit 8e259cd2a6b60d75dd17e73432f11bb7b9351bb1 +Subproject commit 48f40ebb539220d328958f8823b094c0b07a4e79 diff --git a/contrib/hyperscan b/contrib/hyperscan index 3907fd00ee8..e9f08df0213 160000 --- a/contrib/hyperscan +++ b/contrib/hyperscan @@ -1 +1 @@ -Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531 +Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa diff --git a/contrib/hyperscan-cmake/CMakeLists.txt b/contrib/hyperscan-cmake/CMakeLists.txt index c44214cded8..75c45ff7bf5 100644 --- a/contrib/hyperscan-cmake/CMakeLists.txt +++ b/contrib/hyperscan-cmake/CMakeLists.txt @@ -252,6 +252,7 @@ if (NOT EXTERNAL_HYPERSCAN_LIBRARY_FOUND) target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1) target_compile_options (hyperscan PRIVATE -g0 # Library has too much debug information + -mno-avx -mno-avx2 # The library is using dynamic dispatch and is confused if AVX is enabled globally -march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system -fno-sanitize=undefined # Assume the library takes care of itself ) diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt index e5bb7f7d11b..83137fe73bf 100644 --- a/contrib/nuraft-cmake/CMakeLists.txt +++ b/contrib/nuraft-cmake/CMakeLists.txt @@ -30,7 +30,12 @@ set(SRCS add_library(nuraft ${SRCS}) -target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1) + +if (NOT OPENSSL_SSL_LIBRARY OR NOT OPENSSL_CRYPTO_LIBRARY) + target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1 SSL_LIBRARY_NOT_FOUND=1) +else() + target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1) +endif() target_include_directories (nuraft SYSTEM PRIVATE ${LIBRARY_DIR}/include/libnuraft) # for some reason include "asio.h" directly without "boost/" prefix. diff --git a/contrib/poco b/contrib/poco index 2c32e17c7df..fbaaba4a02e 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 2c32e17c7dfee1f8bf24227b697cdef5fddf0823 +Subproject commit fbaaba4a02e29987b8c584747a496c79528f125f diff --git a/debian/changelog b/debian/changelog index 1cec020f026..53b36cae114 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (21.2.1.1) unstable; urgency=low +clickhouse (21.3.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Mon, 11 Jan 2021 11:12:08 +0300 + -- clickhouse-release Mon, 01 Feb 2021 12:50:53 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 5022687c47b..43921a4d3c4 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.2.1.* +ARG version=21.3.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 3528ae68ef6..8e39af5646c 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.2.1.* +ARG version=21.3.1.* ARG gosu_ver=1.10 # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/README.md b/docker/server/README.md index d8e9204dffa..6f799d68185 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -56,7 +56,7 @@ $ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @- 20.12.3.3 ``` -### Volumes +### Volumes Typically you may want to mount the following folders inside your container to archieve persistency: @@ -76,7 +76,7 @@ You may also want to mount: * `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets * `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below). -### Linux capabilities +### Linux capabilities ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html). @@ -113,10 +113,10 @@ $ docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-serv ### How to create default database and user on starting -Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD`: +Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`: ``` -$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server +$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server ``` ## How to extend this image diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh index 0142149b5bd..329888f2fcb 100755 --- a/docker/server/alpine-build.sh +++ b/docker/server/alpine-build.sh @@ -54,8 +54,10 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6 "${CONTAIN docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib" +docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib" docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64" +docker cp -L "${ubuntu20image}":/etc/nsswitch.conf "${CONTAINER_ROOT_FOLDER}/etc" docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull rm -rf "$CONTAINER_ROOT_FOLDER" diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 8a4d02a6014..0138a165505 100755 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -54,6 +54,7 @@ FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_ CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}" CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}" CLICKHOUSE_DB="${CLICKHOUSE_DB:-}" +CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}" for dir in "$DATA_DIR" \ "$ERROR_LOG_DIR" \ @@ -97,6 +98,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL ${CLICKHOUSE_PASSWORD} default + ${CLICKHOUSE_ACCESS_MANAGEMENT} @@ -120,7 +122,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then sleep 1 done - clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" ) + clickhouseclient=( clickhouse-client --multiquery --host "127.0.0.1" -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" ) echo diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index df918928f99..f151ae8fddf 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=21.2.1.* +ARG version=21.3.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 03b7b2fc53a..64be52d8e30 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -47,6 +47,7 @@ RUN apt-get update \ expect \ fakeroot \ git \ + gdb \ gperf \ lld-${LLVM_VERSION} \ llvm-${LLVM_VERSION} \ diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 7555b5591d0..b6fcdd7f7d2 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -107,6 +107,18 @@ function start_server fi echo "ClickHouse server pid '$server_pid' started and responded" + + echo " +handle all noprint +handle SIGSEGV stop print +handle SIGBUS stop print +handle SIGABRT stop print +continue +thread apply all backtrace +continue +" > script.gdb + + gdb -batch -command script.gdb -p "$server_pid" & } function clone_root @@ -120,7 +132,7 @@ function clone_root git checkout FETCH_HEAD echo 'Clonned merge head' else - git fetch + git fetch origin "+refs/pull/$PULL_REQUEST_NUMBER/head" git checkout "$COMMIT_SHA" echo 'Checked out to commit' fi @@ -163,6 +175,7 @@ function clone_submodules contrib/xz contrib/dragonbox contrib/fast_float + contrib/NuRaft ) git submodule sync @@ -182,6 +195,7 @@ function run_cmake "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" "-DUSE_UNWIND=1" + "-DENABLE_NURAFT=1" ) # TODO remove this? we don't use ccache anyway. An option would be to download it @@ -251,8 +265,13 @@ function run_tests 00701_rollup 00834_cancel_http_readonly_queries_on_client_close 00911_tautological_compare + + # Hyperscan 00926_multimatch 00929_multi_match_edit_distance + 01681_hyperscan_debug_assertion + + 01176_mysql_client_interactive # requires mysql client 01031_mutations_interpreter_and_context 01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled 01083_expressions_in_engine_arguments @@ -315,11 +334,12 @@ function run_tests # In fasttest, ENABLE_LIBRARIES=0, so rocksdb engine is not enabled by default 01504_rocksdb + 01686_rocksdb # Look at DistributedFilesToInsert, so cannot run in parallel. 01460_DistributedFilesToInsert - 01541_max_memory_usage_for_user + 01541_max_memory_usage_for_user_long # Require python libraries like scipy, pandas and numpy 01322_ttest_scipy @@ -335,9 +355,10 @@ function run_tests # JSON functions 01666_blns + 01674_htm_xml_coarse_parse ) - time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" + (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" # substr is to remove semicolon after test name readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt") diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index b036f99e91d..766fec76179 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -21,13 +21,16 @@ function clone git init git remote add origin https://github.com/ClickHouse/ClickHouse - git fetch --depth=100 origin "$SHA_TO_TEST" - git fetch --depth=100 origin master # Used to obtain the list of modified or added tests + + # Network is unreliable. GitHub neither. + for _ in {1..100}; do git fetch --depth=100 origin "$SHA_TO_TEST" && break; sleep 1; done + # Used to obtain the list of modified or added tests + for _ in {1..100}; do git fetch --depth=100 origin master && break; sleep 1; done # If not master, try to fetch pull/.../{head,merge} if [ "$PR_TO_TEST" != "0" ] then - git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*" + for _ in {1..100}; do git fetch --depth=100 origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*" && break; sleep 1; done fi git checkout "$SHA_TO_TEST" @@ -187,7 +190,7 @@ case "$stage" in # Lost connection to the server. This probably means that the server died # with abort. echo "failure" > status.txt - if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*" server.log > description.txt + if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt then echo "Lost connection to server. See the logs." > description.txt fi diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 9b51891ccf5..e0e5e36a3d6 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -58,10 +58,11 @@ RUN dockerd --version; docker --version RUN python3 -m pip install \ PyMySQL \ - aerospike \ + aerospike==4.0.0 \ avro \ cassandra-driver \ - confluent-kafka \ + confluent-kafka==1.5.0 \ + dict2xml \ dicttoxml \ docker \ docker-compose==1.22.0 \ diff --git a/docker/test/integration/runner/compose/docker_compose_cassandra.yml b/docker/test/integration/runner/compose/docker_compose_cassandra.yml index 6567a352027..c5cdfac5ce7 100644 --- a/docker/test/integration/runner/compose/docker_compose_cassandra.yml +++ b/docker/test/integration/runner/compose/docker_compose_cassandra.yml @@ -4,4 +4,4 @@ services: image: cassandra restart: always ports: - - 9043:9042 + - 9043:9042 diff --git a/docker/test/integration/runner/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml index b8cd7f64273..43dd1aa43d3 100644 --- a/docker/test/integration/runner/compose/docker_compose_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_hdfs.yml @@ -5,6 +5,6 @@ services: hostname: hdfs1 restart: always ports: - - 50075:50075 - - 50070:50070 + - 50075:50075 + - 50070:50070 entrypoint: /etc/bootstrap.sh -d diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 219d977ffd9..b77542f7e11 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -5,42 +5,42 @@ services: image: zookeeper:3.4.9 hostname: kafka_zookeeper environment: - ZOO_MY_ID: 1 - ZOO_PORT: 2181 - ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888 + ZOO_MY_ID: 1 + ZOO_PORT: 2181 + ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888 security_opt: - - label:disable + - label:disable kafka1: image: confluentinc/cp-kafka:5.2.0 hostname: kafka1 ports: - - "9092:9092" + - "9092:9092" environment: - KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092 - KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092 - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT - KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE - KAFKA_BROKER_ID: 1 - KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" - KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092 + KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092 + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT + KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" + KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 depends_on: - - kafka_zookeeper + - kafka_zookeeper security_opt: - - label:disable + - label:disable schema-registry: image: confluentinc/cp-schema-registry:5.2.0 hostname: schema-registry ports: - - "8081:8081" + - "8081:8081" environment: - SCHEMA_REGISTRY_HOST_NAME: schema-registry - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT - SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_HOST_NAME: schema-registry + SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 depends_on: - - kafka_zookeeper - - kafka1 + - kafka_zookeeper + - kafka1 security_opt: - - label:disable + - label:disable diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml index a74476613f3..e2e15975e22 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml @@ -8,22 +8,22 @@ services: hostname: kerberizedhdfs1 restart: always volumes: - - ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro - - ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf - - ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro + - ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro + - ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf + - ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro ports: - 1006:1006 - 50070:50070 - - 9000:9000 + - 9010:9010 depends_on: - - hdfskerberos + - hdfskerberos entrypoint: /etc/bootstrap.sh -d hdfskerberos: image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG} hostname: hdfskerberos volumes: - - ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab - - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh - - /dev/urandom:/dev/random + - ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab + - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh + - /dev/urandom:/dev/random ports: [88, 749] diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml index 6e1e11344bb..64a3ef3e956 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml @@ -6,54 +6,54 @@ services: # restart: always hostname: kafka_kerberized_zookeeper environment: - ZOOKEEPER_SERVER_ID: 1 - ZOOKEEPER_CLIENT_PORT: 2181 - ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888" - KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true" + ZOOKEEPER_SERVER_ID: 1 + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888" + KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true" volumes: - - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets - - /dev/urandom:/dev/random + - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets + - /dev/urandom:/dev/random depends_on: - - kafka_kerberos + - kafka_kerberos security_opt: - - label:disable + - label:disable kerberized_kafka1: image: confluentinc/cp-kafka:5.2.0 # restart: always hostname: kerberized_kafka1 ports: - - "9092:9092" - - "9093:9093" + - "9092:9092" + - "9093:9093" environment: - KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093 - KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093 - # KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092 - # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092 - KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI - KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI - KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka - KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT, - KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE - KAFKA_BROKER_ID: 1 - KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181" - KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 - KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true" + KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093 + KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093 + # KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092 + # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092 + KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI + KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI + KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT, + KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181" + KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true" volumes: - - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets - - /dev/urandom:/dev/random + - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets + - /dev/urandom:/dev/random depends_on: - - kafka_kerberized_zookeeper - - kafka_kerberos + - kafka_kerberized_zookeeper + - kafka_kerberos security_opt: - - label:disable + - label:disable kafka_kerberos: image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} hostname: kafka_kerberos volumes: - - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab - - ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh - - /dev/urandom:/dev/random + - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab + - ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh + - /dev/urandom:/dev/random ports: [88, 749] diff --git a/docker/test/integration/runner/compose/docker_compose_mongo.yml b/docker/test/integration/runner/compose/docker_compose_mongo.yml index 8c54544ed88..6c98fde2303 100644 --- a/docker/test/integration/runner/compose/docker_compose_mongo.yml +++ b/docker/test/integration/runner/compose/docker_compose_mongo.yml @@ -7,5 +7,5 @@ services: MONGO_INITDB_ROOT_USERNAME: root MONGO_INITDB_ROOT_PASSWORD: clickhouse ports: - - 27018:27017 + - 27018:27017 command: --profile=2 --verbose diff --git a/docker/test/integration/runner/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml index 90daf8a4238..5b15d517f37 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml @@ -6,5 +6,5 @@ services: environment: MYSQL_ROOT_PASSWORD: clickhouse ports: - - 3308:3306 + - 3308:3306 command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml index e7d762203ee..5aa13ba91c7 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_5_7_for_materialize_mysql.yml @@ -6,5 +6,9 @@ services: environment: MYSQL_ROOT_PASSWORD: clickhouse ports: - - 3308:3306 - command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency + - 3308:3306 + command: --server_id=100 --log-bin='mysql-bin-1.log' + --default-time-zone='+3:00' + --gtid-mode="ON" + --enforce-gtid-consistency + --log-error-verbosity=3 diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml index 918a2b5f80f..7c8a930c84e 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0_for_materialize_mysql.yml @@ -6,5 +6,10 @@ services: environment: MYSQL_ROOT_PASSWORD: clickhouse ports: - - 33308:3306 - command: --server_id=100 --log-bin='mysql-bin-1.log' --default_authentication_plugin='mysql_native_password' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency + - 33308:3306 + command: --server_id=100 --log-bin='mysql-bin-1.log' + --default_authentication_plugin='mysql_native_password' + --default-time-zone='+3:00' + --gtid-mode="ON" + --enforce-gtid-consistency + --log-error-verbosity=3 diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_client.yml index 802151c4d7b..5e4565d64c3 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_client.yml @@ -7,7 +7,7 @@ services: MYSQL_ALLOW_EMPTY_PASSWORD: 1 command: --federated --socket /var/run/mysqld/mysqld.sock healthcheck: - test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] + test: ["CMD", "mysqladmin", "ping", "-h", "localhost"] interval: 1s timeout: 2s retries: 100 diff --git a/docker/test/integration/runner/compose/docker_compose_postgesql.yml b/docker/test/integration/runner/compose/docker_compose_postgesql.yml index 984f5f97384..90764188ddd 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgesql.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgesql.yml @@ -11,4 +11,4 @@ services: ports: - "5433:5433" environment: - POSTGRES_HOST_AUTH_METHOD: "trust" \ No newline at end of file + POSTGRES_HOST_AUTH_METHOD: "trust" diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml index fff4fb1fa42..5657352e1b3 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgres.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml @@ -6,8 +6,8 @@ services: environment: POSTGRES_PASSWORD: mysecretpassword ports: - - 5432:5432 + - 5432:5432 networks: - default: - aliases: - - postgre-sql.local + default: + aliases: + - postgre-sql.local diff --git a/docker/test/integration/runner/compose/docker_compose_redis.yml b/docker/test/integration/runner/compose/docker_compose_redis.yml index 72df99ec59b..3d834aadaa4 100644 --- a/docker/test/integration/runner/compose/docker_compose_redis.yml +++ b/docker/test/integration/runner/compose/docker_compose_redis.yml @@ -4,5 +4,5 @@ services: image: redis restart: always ports: - - 6380:6379 + - 6380:6379 command: redis-server --requirepass "clickhouse" --databases 32 diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 9a0d8093a55..2b19a5e75a8 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -97,6 +97,7 @@ function configure rm -r right/db ||: rm -r db0/preprocessed_configs ||: rm -r db0/{data,metadata}/system ||: + rm db0/status ||: cp -al db0/ left/db/ cp -al db0/ right/db/ } diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 48479161ef9..f1c5df146aa 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -44,6 +44,7 @@ parser.add_argument('--port', nargs='*', default=[9000], help="Space-separated l parser.add_argument('--runs', type=int, default=1, help='Number of query runs per server.') parser.add_argument('--max-queries', type=int, default=None, help='Test no more than this number of queries, chosen at random.') parser.add_argument('--queries-to-run', nargs='*', type=int, default=None, help='Space-separated list of indexes of queries to test.') +parser.add_argument('--max-query-seconds', type=int, default=10, help='For how many seconds at most a query is allowed to run. The script finishes with error if this time is exceeded.') parser.add_argument('--profile-seconds', type=int, default=0, help='For how many seconds to profile a query for which the performance has changed.') parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.') @@ -323,7 +324,7 @@ for query_index in queries_to_run: server_seconds += elapsed print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{elapsed}') - if elapsed > 10: + if elapsed > args.max_query_seconds: # Stop processing pathologically slow queries, to avoid timing out # the entire test task. This shouldn't really happen, so we don't # need much handling for this case and can just exit. diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index f2fcefd604f..7779f0e9dc2 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -60,4 +60,8 @@ fi # more idiologically correct. read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" +if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--replicated-database') +fi + clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index b063f8d81f6..2437415d17c 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -3,6 +3,9 @@ FROM yandex/clickhouse-test-base ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz" +RUN echo "deb [trusted=yes] http://repo.mysql.com/apt/ubuntu/ bionic mysql-5.7" >> /etc/apt/sources.list \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8C718D3B5072E1F5 + RUN apt-get update -y \ && env DEBIAN_FRONTEND=noninteractive \ apt-get install --yes --no-install-recommends \ @@ -13,6 +16,7 @@ RUN apt-get update -y \ ncdu \ netcat-openbsd \ openssl \ + protobuf-compiler \ python3 \ python3-lxml \ python3-requests \ @@ -23,7 +27,8 @@ RUN apt-get update -y \ telnet \ tree \ unixodbc \ - wget + wget \ + mysql-client=5.7* RUN pip3 install numpy scipy pandas diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index d9a03f84726..d078f3739fd 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -53,14 +53,19 @@ function run_tests() if [ "$NUM_TRIES" -gt "1" ]; then ADDITIONAL_OPTIONS+=('--skip') ADDITIONAL_OPTIONS+=('00000_no_tests_to_skip') + ADDITIONAL_OPTIONS+=('--jobs') + ADDITIONAL_OPTIONS+=('4') fi - for _ in $(seq 1 "$NUM_TRIES"); do - clickhouse-test --testname --shard --zookeeper --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee -a test_output/test_result.txt - if [ "${PIPESTATUS[0]}" -ne "0" ]; then - break; - fi - done + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--replicated-database') + fi + + clickhouse-test --testname --shard --zookeeper --hung-check --print-time \ + --test-runs "$NUM_TRIES" \ + "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' \ + | tee -a test_output/test_result.txt } export -f run_tests diff --git a/docker/test/stateless_pytest/Dockerfile b/docker/test/stateless_pytest/Dockerfile index 4d0274143d6..58846f90fa7 100644 --- a/docker/test/stateless_pytest/Dockerfile +++ b/docker/test/stateless_pytest/Dockerfile @@ -5,7 +5,10 @@ RUN apt-get update -y && \ apt-get install -y --no-install-recommends \ python3-pip \ python3-setuptools \ - python3-wheel + python3-wheel \ + brotli \ + netcat-openbsd \ + zstd RUN python3 -m pip install \ wheel \ @@ -15,7 +18,10 @@ RUN python3 -m pip install \ pytest-randomly \ pytest-rerunfailures \ pytest-timeout \ - pytest-xdist + pytest-xdist \ + pandas \ + numpy \ + scipy CMD dpkg -i package_folder/clickhouse-common-static_*.deb; \ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb; \ diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 9da2f3d3ada..dc1e4db4477 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -8,16 +8,23 @@ dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb dpkg -i package_folder/clickhouse-test_*.deb +function configure() +{ + # install test configs + /usr/share/clickhouse-test/config/install.sh + + # for clickhouse-server (via service) + echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment + # for clickhouse-client + export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' + + # since we run clickhouse from root + sudo chown root: /var/lib/clickhouse +} + function stop() { - timeout 120 service clickhouse-server stop - - # Wait for process to disappear from processlist and also try to kill zombies. - while kill -9 "$(pidof clickhouse-server)" - do - echo "Killed clickhouse-server" - sleep 0.5 - done + clickhouse stop } function start() @@ -33,19 +40,26 @@ function start() tail -n1000 /var/log/clickhouse-server/clickhouse-server.log break fi - timeout 120 service clickhouse-server start + # use root to match with current uid + clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>/var/log/clickhouse-server/stderr.log sleep 0.5 counter=$((counter + 1)) done + + echo " +handle all noprint +handle SIGSEGV stop print +handle SIGBUS stop print +handle SIGABRT stop print +continue +thread apply all backtrace +continue +" > script.gdb + + gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" & } -# install test configs -/usr/share/clickhouse-test/config/install.sh - -# for clickhouse-server (via service) -echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment -# for clickhouse-client -export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000' +configure start @@ -64,7 +78,7 @@ clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" clickhouse-client --query "SHOW TABLES FROM test" -./stress --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" +./stress --hung-check --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt stop start diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 458f78fcdb4..841556cf090 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -1,8 +1,9 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- from multiprocessing import cpu_count -from subprocess import Popen, check_call +from subprocess import Popen, call, STDOUT import os +import sys import shutil import argparse import logging @@ -22,12 +23,15 @@ def get_options(i): if 0 < i: options += " --order=random" - if i % 2 == 1: + if i % 3 == 1: options += " --db-engine=Ordinary" + if i % 3 == 2: + options += ''' --db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i) + # If database name is not specified, new database is created for each functional test. # Run some threads with one database for all tests. - if i % 3 == 1: + if i % 2 == 1: options += " --database=test_{}".format(i) if i == 13: @@ -64,7 +68,8 @@ if __name__ == "__main__": parser.add_argument("--server-log-folder", default='/var/log/clickhouse-server') parser.add_argument("--output-folder") parser.add_argument("--global-time-limit", type=int, default=3600) - parser.add_argument("--num-parallel", default=cpu_count()); + parser.add_argument("--num-parallel", default=cpu_count()) + parser.add_argument('--hung-check', action='store_true', default=False) args = parser.parse_args() func_pipes = [] @@ -81,4 +86,13 @@ if __name__ == "__main__": logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes)) time.sleep(5) + logging.info("All processes finished") + if args.hung_check: + logging.info("Checking if some queries hung") + cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1") + res = call(cmd, shell=True, stderr=STDOUT) + if res != 0: + logging.info("Hung check failed with exit code {}".format(res)) + sys.exit(1) + logging.info("Stress test finished") diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 7047007d2fc..e70f9e05679 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,12 +1,23 @@ # docker build -t yandex/clickhouse-style-test . FROM ubuntu:20.04 -RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip python3-pytest && pip3 install codespell +RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ + shellcheck \ + libxml2-utils \ + git \ + python3-pip \ + pylint \ + yamllint \ + && pip3 install codespell +# For |& syntax +SHELL ["bash", "-c"] + CMD cd /ClickHouse/utils/check-style && \ - ./check-style -n | tee /test_output/style_output.txt && \ - ./check-typos | tee /test_output/typos_output.txt && \ - ./check-whitespaces -n | tee /test_output/whitespaces_output.txt && \ - ./check-duplicate-includes.sh | tee /test_output/duplicate_output.txt && \ - ./shellcheck-run.sh | tee /test_output/shellcheck_output.txt + ./check-style -n |& tee /test_output/style_output.txt && \ + ./check-typos |& tee /test_output/typos_output.txt && \ + ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt && \ + ./check-duplicate-includes.sh |& tee /test_output/duplicate_output.txt && \ + ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt && \ + true diff --git a/docs/_description_templates/template-data-type.md b/docs/_description_templates/template-data-type.md new file mode 100644 index 00000000000..edb6586ee7d --- /dev/null +++ b/docs/_description_templates/template-data-type.md @@ -0,0 +1,29 @@ +--- +toc_priority: +toc_title: +--- + +# data_type_name {#data_type-name} + +Description. + +**Parameters** (Optional) + +- `x` — Description. [Type name](relative/path/to/type/dscr.md#type). +- `y` — Description. [Type name](relative/path/to/type/dscr.md#type). + +**Examples** + +```sql + +``` + +## Additional Info {#additional-info} (Optional) + +The name of an additional section can be any, for example, **Usage**. + +**See Also** (Optional) + +- [link](#) + +[Original article](https://clickhouse.tech/docs/en/data_types//) diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md index b69d7ed5309..a0074a76ef6 100644 --- a/docs/_description_templates/template-function.md +++ b/docs/_description_templates/template-function.md @@ -12,16 +12,20 @@ Alias: ``. (Optional) More text (Optional). -**Parameters** (Optional) +**Arguments** (Optional) - `x` — Description. [Type name](relative/path/to/type/dscr.md#type). - `y` — Description. [Type name](relative/path/to/type/dscr.md#type). +**Parameters** (Optional, only for parametric aggregate functions) + +- `z` — Description. [Type name](relative/path/to/type/dscr.md#type). + **Returned value(s)** -- Returned values list. +- Returned values list. -Type: [Type](relative/path/to/type/dscr.md#type). +Type: [Type name](relative/path/to/type/dscr.md#type). **Example** diff --git a/docs/_description_templates/template-system-table.md b/docs/_description_templates/template-system-table.md index 3fdf9788d79..f2decc4bb6d 100644 --- a/docs/_description_templates/template-system-table.md +++ b/docs/_description_templates/template-system-table.md @@ -8,10 +8,14 @@ Columns: **Example** +Query: + ``` sql SELECT * FROM system.table_name ``` +Result: + ``` text Some output. It shouldn't be too long. ``` diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 60365ad744a..e0b1be710f1 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -40,7 +40,7 @@ $ cd ClickHouse ``` bash $ mkdir build $ cd build -$ cmake ..-DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm` +$ cmake .. -DCMAKE_C_COMPILER=`brew --prefix llvm`/bin/clang -DCMAKE_CXX_COMPILER=`brew --prefix llvm`/bin/clang++ -DCMAKE_PREFIX_PATH=`brew --prefix llvm` $ ninja $ cd .. ``` diff --git a/docs/en/engines/database-engines/materialize-mysql.md b/docs/en/engines/database-engines/materialize-mysql.md index 89fe9304c4c..2e361cc82f0 100644 --- a/docs/en/engines/database-engines/materialize-mysql.md +++ b/docs/en/engines/database-engines/materialize-mysql.md @@ -93,6 +93,7 @@ ClickHouse has only one physical order, which is determined by `ORDER BY` clause - Cascade `UPDATE/DELETE` queries are not supported by the `MaterializeMySQL` engine. - Replication can be easily broken. - Manual operations on database and tables are forbidden. +- `MaterializeMySQL` is influenced by [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged in the corresponding table in the `MaterializeMySQL` database when a table in the MySQL server changes. ## Examples of Use {#examples-of-use} @@ -156,4 +157,4 @@ SELECT * FROM mysql.test; └───┴─────┴──────┘ ``` -[Original article](https://clickhouse.tech/docs/en/database_engines/materialize-mysql/) +[Original article](https://clickhouse.tech/docs/en/engines/database-engines/materialize-mysql/) diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 857e148277c..6e864751cc3 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -7,8 +7,6 @@ toc_title: EmbeddedRocksDB This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/). -`EmbeddedRocksDB` lets you: - ## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table} ``` sql @@ -23,6 +21,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Required parameters: - `primary_key_name` – any column name in the column list. +- `primary key` must be specified, it supports only one column in the primary key. The primary key will be serialized in binary as a `rocksdb key`. +- columns other than the primary key will be serialized in binary as `rocksdb` value in corresponding order. +- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from `rocksdb`. Example: @@ -38,8 +39,4 @@ ENGINE = EmbeddedRocksDB PRIMARY KEY key ``` -## Description {#description} - -- `primary key` must be specified, it only supports one column in primary key. The primary key will serialized in binary as rocksdb key. -- columns other than the primary key will be serialized in binary as rocksdb value in corresponding order. -- queries with key `equals` or `in` filtering will be optimized to multi keys lookup from rocksdb. +[Original article](https://clickhouse.tech/docs/en/operations/table_engines/embedded-rocksdb/) diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index cf3e36c2f48..288c9c3cd56 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -12,6 +12,9 @@ List of supported integrations: - [ODBC](../../../engines/table-engines/integrations/odbc.md) - [JDBC](../../../engines/table-engines/integrations/jdbc.md) - [MySQL](../../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) - [HDFS](../../../engines/table-engines/integrations/hdfs.md) - [S3](../../../engines/table-engines/integrations/s3.md) - [Kafka](../../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md new file mode 100644 index 00000000000..e648a13b5e0 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -0,0 +1,57 @@ +--- +toc_priority: 7 +toc_title: MongoDB +--- + +# MongoDB {#mongodb} + +MongoDB engine is read-only table engine which allows to read data (`SELECT` queries) from remote MongoDB collection. Engine supports only non-nested data types. `INSERT` queries are not supported. + +## Creating a Table {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name +( + name1 [type1], + name2 [type2], + ... +) ENGINE = MongoDB(host:port, database, collection, user, password); +``` + +**Engine Parameters** + +- `host:port` — MongoDB server address. + +- `database` — Remote database name. + +- `collection` — Remote collection name. + +- `user` — MongoDB user. + +- `password` — User password. + +## Usage Example {#usage-example} + +Table in ClickHouse which allows to read data from MongoDB collection: + +``` text +CREATE TABLE mongo_table +( + key UInt64, + data String +) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse'); +``` + +Query: + +``` sql +SELECT COUNT() FROM mongo_table; +``` + +``` text +┌─count()─┐ +│ 4 │ +└─────────┘ +``` + +[Original article](https://clickhouse.tech/docs/en/operations/table_engines/integrations/mongodb/) diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index b0901ee6f6e..4a0550275ca 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -59,10 +59,26 @@ Optional parameters: - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` -Required configuration: +Also format settings can be added along with rabbitmq-related settings. + +Example: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64, + date DateTime + ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', + rabbitmq_exchange_name = 'exchange1', + rabbitmq_format = 'JSONEachRow', + rabbitmq_num_consumers = 5, + date_time_input_format = 'best_effort'; +``` The RabbitMQ server configuration should be added using the ClickHouse config file. +Required configuration: + ``` xml root @@ -70,16 +86,12 @@ The RabbitMQ server configuration should be added using the ClickHouse config fi ``` -Example: +Additional configuration: -``` sql - CREATE TABLE queue ( - key UInt64, - value UInt64 - ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', - rabbitmq_exchange_name = 'exchange1', - rabbitmq_format = 'JSONEachRow', - rabbitmq_num_consumers = 5; +``` xml + + clickhouse + ``` ## Description {#description} @@ -105,6 +117,7 @@ Exchange type options: - `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. Setting `rabbitmq_queue_base` may be used for the following cases: + - to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same. - to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables. - to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.) diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index d8cceb4d511..5858a0803e6 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -136,8 +136,7 @@ The following settings can be specified in configuration file for given endpoint - `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint. - `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint. - `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint. - -This configuration also applies to S3 disks in `MergeTree` table engine family. +- `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Example: @@ -149,6 +148,7 @@ Example: + ``` diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 084d05ec0a0..753859b46d2 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -45,7 +45,10 @@ ORDER BY expr [PARTITION BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] -[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...] +[TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ] + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] [SETTINGS name=value, ...] ``` @@ -80,7 +83,7 @@ For a description of parameters, see the [CREATE query description](../../../sql Expression must have one `Date` or `DateTime` column as a result. Example: `TTL date + INTERVAL 1 DAY` - Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`). Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule. + Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule. For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl) @@ -101,7 +104,8 @@ For a description of parameters, see the [CREATE query description](../../../sql - `max_parts_in_total` — Maximum number of parts in all partitions. - `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. - `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting. - + - `max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) in the global setting. + **Example of Sections Setting** ``` sql @@ -455,18 +459,28 @@ ALTER TABLE example_table Table can have an expression for removal of expired rows, and multiple expressions for automatic move of parts between [disks or volumes](#table_engine-mergetree-multiple-volumes). When rows in the table expire, ClickHouse deletes all corresponding rows. For parts moving feature, all rows of a part must satisfy the movement expression criteria. ``` sql -TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ... +TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ... + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ``` Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time): - `DELETE` - delete expired rows (default action); - `TO DISK 'aaa'` - move part to the disk `aaa`; -- `TO VOLUME 'bbb'` - move part to the disk `bbb`. +- `TO VOLUME 'bbb'` - move part to the disk `bbb`; +- `GROUP BY` - aggregate expired rows. -Examples: +With `WHERE` clause you may specify which of the expired rows to delete or aggregate (it cannot be applied to moves). -Creating a table with TTL +`GROUP BY` expression must be a prefix of the table primary key. + +If a column is not part of the `GROUP BY` expression and is not set explicitely in the `SET` clause, in result row it contains an occasional value from the grouped rows (as if aggregate function `any` is applied to it). + +**Examples** + +Creating a table with TTL: ``` sql CREATE TABLE example_table @@ -482,13 +496,43 @@ TTL d + INTERVAL 1 MONTH [DELETE], d + INTERVAL 2 WEEK TO DISK 'bbb'; ``` -Altering TTL of the table +Altering TTL of the table: ``` sql ALTER TABLE example_table MODIFY TTL d + INTERVAL 1 DAY; ``` +Creating a table, where the rows are expired after one month. The expired rows where dates are Mondays are deleted: + +``` sql +CREATE TABLE table_with_where +( + d DateTime, + a Int +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; +``` + +Creating a table, where expired rows are aggregated. In result rows `x` contains the maximum value accross the grouped rows, `y` — the minimum value, and `d` — any occasional value from grouped rows. + +``` sql +CREATE TABLE table_for_aggregation +( + d DateTime, + k1 Int, + k2 Int, + x Int, + y Int +) +ENGINE = MergeTree +ORDER BY k1, k2 +TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); +``` + **Removing Data** Data with an expired TTL is removed when ClickHouse merges data parts. @@ -671,6 +715,7 @@ Configuration markup: https://storage.yandexcloud.net/my-bucket/root-path/ your_access_key_id your_secret_access_key + your_base64_encoded_customer_key http://proxy1 http://proxy2 @@ -706,7 +751,8 @@ Optional parameters: - `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. - `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`. - `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks//cache/`. -- `skip_access_check` — If true disk access checks will not be performed on disk start-up. Default value is `false`. +- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. S3 disk can be configured as `main` or `cold` storage: diff --git a/docs/en/faq/operations/delete-old-data.md b/docs/en/faq/operations/delete-old-data.md index 5addc455602..fdf1f1f290e 100644 --- a/docs/en/faq/operations/delete-old-data.md +++ b/docs/en/faq/operations/delete-old-data.md @@ -39,4 +39,4 @@ More details on [manipulating partitions](../../sql-reference/statements/alter/p It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need. -More details on [table truncation](../../sql-reference/statements/alter/partition.md#alter_drop-partition). +More details on [table truncation](../../sql-reference/statements/truncate.md). diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md index 64363c963c5..fe697972dff 100644 --- a/docs/en/getting-started/tutorial.md +++ b/docs/en/getting-started/tutorial.md @@ -644,7 +644,7 @@ If there are no replicas at the moment on replicated table creation, a new first ``` sql CREATE TABLE tutorial.hits_replica (...) -ENGINE = ReplcatedMergeTree( +ENGINE = ReplicatedMergeTree( '/clickhouse_perftest/tables/{shard}/hits', '{replica}' ) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 11291d61300..33bf90a8b52 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -31,8 +31,8 @@ The supported formats are: | [JSONCompactString](#jsoncompactstring) | ✗ | ✔ | | [JSONEachRow](#jsoneachrow) | ✔ | ✔ | | [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | -| [JSONStringEachRow](#jsonstringeachrow) | ✔ | ✔ | -| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress) | ✗ | ✔ | +| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | +| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | | [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | | [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | | [JSONCompactStringEachRow](#jsoncompactstringeachrow) | ✔ | ✔ | @@ -612,7 +612,7 @@ Example: ``` ## JSONEachRow {#jsoneachrow} -## JSONStringEachRow {#jsonstringeachrow} +## JSONStringsEachRow {#jsonstringseachrow} ## JSONCompactEachRow {#jsoncompacteachrow} ## JSONCompactStringEachRow {#jsoncompactstringeachrow} @@ -627,9 +627,9 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite When inserting the data, you should provide a separate JSON value for each row. ## JSONEachRowWithProgress {#jsoneachrowwithprogress} -## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress} +## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress} -Differs from `JSONEachRow`/`JSONStringEachRow` in that ClickHouse will also yield progress information as JSON values. +Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values. ```json {"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}} diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index 2684e6fdd3a..454d856f779 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -8,118 +8,120 @@ toc_title: Adopters !!! warning "Disclaimer" The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. We’d appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you won’t have any NDA issues by doing so. Providing updates with publications from other companies is also useful. -| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | -|------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 2gis | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | -| Admiral | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) | -| Alibaba Cloud | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) | -| Aloha Browser | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) | -| Amadeus | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | -| Appsflyer | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | -| ArenaData | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | -| Avito | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) | -| Badoo | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | -| Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | +| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size\* | Reference | +|---------|----------|---------|--------------|------------------------------------------------------------------------------|-----------| +| 2gis | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | +| Admiral | Martech | Engagement Management | — | — | [Webinar Slides, June 2020](https://altinity.com/presentations/2020/06/16/big-data-in-real-time-how-clickhouse-powers-admirals-visitor-relationships-for-publishers) | +| Alibaba Cloud | Cloud | Managed Service | — | — | [Official Website](https://help.aliyun.com/product/144466.html) | +| Aloha Browser | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) | +| Amadeus | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | +| Appsflyer | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | +| ArenaData | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | +| Avito | Classifieds | Monitoring | — | — | [Meetup, April 2020](https://www.youtube.com/watch?v=n1tm4j4W8ZQ) | +| Badoo | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | +| Benocs | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | | BIGO | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) | -| Bloomberg | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | -| Bloxy | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | -| Bytedance | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) | +| Bloomberg | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | +| Bloxy | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | +| Bytedance | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) | | CardsMobile | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) | -| CARTO | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | -| CERN | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | -| Cisco | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | -| Citadel Securities | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | -| Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | -| Cloudflare | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | -| Comcast | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) | -| ContentSquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | -| Corunet | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | -| CraiditX 氪信 | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | -| Crazypanda | Games | | — | — | Live session on ClickHouse meetup | -| Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | -| Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | -| Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | -| Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | -| Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | -| Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | -| eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | -| Exness | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | -| FastNetMon | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) | -| Flipkart | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) | -| FunCorp | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | -| Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | -| Genotek | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) | -| HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | -| ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | -| Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | -| Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | -| InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | -| Instana | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | -| Integros | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| Ippon Technologies | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) | -| Ivi | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) | -| Jinshuju 金数据 | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | -| Kodiak Data | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | -| Kontur | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | -| Kuaishou | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) | -| Lawrence Berkeley National Laboratory | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | -| LifeStreet | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | -| Mail.ru Cloud Solutions | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | -| Marilyn | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) | -| Mello | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) | -| MessageBird | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | -| MindsDB | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x -| MUX | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) | -| MGID | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | -| NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | -| Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | -| OneAPM | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | -| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | -| Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | -| Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | -| PostHog | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) | -| Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | -| Pragma Innovation | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | -| QINGCLOUD | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | -| Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | -| Raiffeisenbank | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) | -| Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | -| Retell | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) | -| Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | -| RuSIEM | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) | -| S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | -| scireum GmbH | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | -| Segment | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) | -| SEMrush | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | -| Sentry | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | -| seo.do | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | -| SGK | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | -| Sina | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | -| SMI2 | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | -| Splunk | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | -| Spotify | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | -| Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | -| Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | -| Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | -| Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | -| Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | -| Tencent Music Entertainment (TME) | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) | -| Traffic Stars | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | -| Uber | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | -| VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | -| Walmart Labs | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) | -| Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | -| Wisebits | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | -| Workato | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) | -| Xiaoxin Tech | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | -| Ximalaya | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | -| Yandex Cloud | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | -| Yandex DataLens | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | -| Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | -| Yandex Metrica | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | -| ЦВТ | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | -| МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | -| ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | -| kakaocorp | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) | +| CARTO | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | +| CERN | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | +| Cisco | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | +| Citadel Securities | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | +| Citymobil | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | +| Cloudflare | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | +| Comcast | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) | +| ContentSquare | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | +| Corunet | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | +| CraiditX 氪信 | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | +| Crazypanda | Games | | — | — | Live session on ClickHouse meetup | +| Criteo | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | +| Dataliance for China Telecom | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | +| Deutsche Bank | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | +| Deeplay | Gaming Analytics | — | — | — | [Job advertisement, 2020](https://career.habr.com/vacancies/1000062568) | +| Diva-e | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | +| Ecwid | E-commerce SaaS | Metrics, Logging | — | — | [Slides in Russian, April 2019](https://nastachku.ru/var/files/1/presentation/backend/2_Backend_6.pdf) | +| eBay | E-commerce | Logs, Metrics and Events | — | — | [Official website, Sep 2020](https://tech.ebayinc.com/engineering/ou-online-analytical-processing/) | +| Exness | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | +| FastNetMon | DDoS Protection | Main Product | | — | [Official website](https://fastnetmon.com/docs-fnm-advanced/fastnetmon-advanced-traffic-persistency/) | +| Flipkart | e-Commerce | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=239) | +| FunCorp | Games | | — | 14 bn records/day as of Jan 2021 | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | +| Geniee | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | +| Genotek | Bioinformatics | Main product | — | — | [Video, August 2020](https://youtu.be/v3KyZbz9lEE) | +| HUYA | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | +| ICA | FinTech | Risk Management | — | — | [Blog Post in English, Sep 2020](https://altinity.com/blog/clickhouse-vs-redshift-performance-for-fintech-risk-management?utm_campaign=ClickHouse%20vs%20RedShift&utm_content=143520807&utm_medium=social&utm_source=twitter&hss_channel=tw-3894792263) | +| Idealista | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | +| Infovista | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | +| InnoGames | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | +| Instana | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | +| Integros | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| Ippon Technologies | Technology Consulting | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=205) | +| Ivi | Online Cinema | Analytics, Monitoring | — | — | [Article in Russian, Jan 2018](https://habr.com/en/company/ivi/blog/347408/) | +| Jinshuju 金数据 | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | +| Kodiak Data | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | +| Kontur | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | +| Kuaishou | Video | — | — | — | [ClickHouse Meetup, October 2018](https://clickhouse.tech/blog/en/2018/clickhouse-community-meetup-in-beijing-on-october-28-2018/) | +| Lawrence Berkeley National Laboratory | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | +| LifeStreet | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | +| Mail.ru Cloud Solutions | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | +| Marilyn | Advertising | Statistics | — | — | [Talk in Russian, June 2017](https://www.youtube.com/watch?v=iXlIgx2khwc) | +| Mello | Marketing | Analytics | 1 server | — | [Article, Oct 2020](https://vc.ru/marketing/166180-razrabotka-tipovogo-otcheta-skvoznoy-analitiki) | +| MessageBird | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | +| MindsDB | Machine Learning | Main Product | — | — | [Official Website](https://www.mindsdb.com/blog/machine-learning-models-as-tables-in-ch) |x +| MUX | Online Video | Video Analytics | — | — | [Talk in English, August 2019](https://altinity.com/presentations/2019/8/13/how-clickhouse-became-the-default-analytics-database-for-mux/) | +| MGID | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | +| NOC Project | Network Monitoring | Analytics | Main Product | — | [Official Website](https://getnoc.com/features/big-data/) | +| Nuna Inc. | Health Data Analytics | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=170) | +| OneAPM | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | +| Panelbear | Analytics | Monitoring and Analytics | — | — | [Tech Stack, November 2020](https://panelbear.com/blog/tech-stack/) | +| Percent 百分点 | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | +| Percona | Performance analysis | Percona Monitoring and Management | — | — | [Official website, Mar 2020](https://www.percona.com/blog/2020/03/30/advanced-query-analysis-in-percona-monitoring-and-management-with-direct-clickhouse-access/) | +| Plausible | Analytics | Main Product | — | — | [Blog post, June 2020](https://twitter.com/PlausibleHQ/status/1273889629087969280) | +| PostHog | Product Analytics | Main Product | — | — | [Release Notes, Oct 2020](https://posthog.com/blog/the-posthog-array-1-15-0) | +| Postmates | Delivery | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=188) | +| Pragma Innovation | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | +| PRANA | Industrial predictive analytics | Main product | — | — | [News (russian), Feb 2021](https://habr.com/en/news/t/541392/) | +| QINGCLOUD | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | +| Qrator | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | +| Raiffeisenbank | Banking | Analytics | — | — | [Lecture in Russian, December 2020](https://cs.hse.ru/announcements/421965599.html) | +| Rambler | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | +| Retell | Speech synthesis | Analytics | — | — | [Blog Article, August 2020](https://vc.ru/services/153732-kak-sozdat-audiostati-na-vashem-sayte-i-zachem-eto-nuzhno) | +| Rspamd | Antispam | Analytics | — | — | [Official Website](https://rspamd.com/doc/modules/clickhouse.html) | +| RuSIEM | SIEM | Main Product | — | — | [Official Website](https://rusiem.com/en/products/architecture) | +| S7 Airlines | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | +| scireum GmbH | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | +| Segment | Data processing | Main product | 9 * i3en.3xlarge nodes 7.5TB NVME SSDs, 96GB Memory, 12 vCPUs | — | [Slides, 2019](https://slides.com/abraithwaite/segment-clickhouse) | +| SEMrush | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | +| Sentry | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | +| seo.do | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | +| SGK | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | +| Sina | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | +| SMI2 | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | +| Splunk | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | +| Spotify | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | +| Staffcop | Information Security | Main Product | — | — | [Official website, Documentation](https://www.staffcop.ru/sce43) | +| Suning | E-Commerce | User behaviour analytics | — | — | [Blog article](https://www.sohu.com/a/434152235_411876) | +| Teralytics | Mobility | Analytics | — | — | [Tech blog](https://www.teralytics.net/knowledge-hub/visualizing-mobility-data-the-scalability-challenge) | +| Tencent | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | +| Tencent | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | +| Tencent Music Entertainment (TME) | BigData | Data processing | — | — | [Blog in Chinese, June 2020](https://cloud.tencent.com/developer/article/1637840) | +| Traffic Stars | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | +| Uber | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | +| VKontakte | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | +| Walmart Labs | Internet, Retail | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=144) | +| Wargaming | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | +| Wisebits | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | +| Workato | Automation Software | — | — | — | [Talk in English, July 2020](https://youtu.be/GMiXCMFDMow?t=334) | +| Xiaoxin Tech | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | +| Ximalaya | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | +| Yandex Cloud | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | +| Yandex DataLens | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | +| Yandex Market | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | +| Yandex Metrica | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | +| ЦВТ | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | +| МКБ | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | +| ЦФТ | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) | +| kakaocorp | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index ea37a22c165..f4206f5d70c 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -5,7 +5,7 @@ toc_title: Data Backup # Data Backup {#data-backup} -While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). However, these safeguards don’t cover all possible cases and can be circumvented. +While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards don’t cover all possible cases and can be circumvented. In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data **in advance**. diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md new file mode 100644 index 00000000000..95f80f192f5 --- /dev/null +++ b/docs/en/operations/external-authenticators/index.md @@ -0,0 +1,13 @@ +--- +toc_folder_title: External User Authenticators and Directories +toc_priority: 48 +toc_title: Introduction +--- + +# External User Authenticators and Directories {#external-authenticators} + +ClickHouse supports authenticating and managing users using external services. + +The following external authenticators and directories are supported: + +- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory) diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md new file mode 100644 index 00000000000..36a13227852 --- /dev/null +++ b/docs/en/operations/external-authenticators/ldap.md @@ -0,0 +1,156 @@ +# LDAP {#external-authenticators-ldap} + +LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this: + +- use LDAP as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths +- use LDAP as an external user directory and allow locally undefined users to be authenticated if they exist on the LDAP server + +For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config so that other parts of config are able to refer to it. + +## LDAP Server Definition {#ldap-server-definition} + +To define LDAP server you must add `ldap_servers` section to the `config.xml`. For example, + +```xml + + + + + localhost + 636 + uid={user_name},ou=users,dc=example,dc=com + 300 + yes + tls1.2 + demand + /path/to/tls_cert_file + /path/to/tls_key_file + /path/to/tls_ca_cert_file + /path/to/tls_ca_cert_dir + ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384 + + + +``` + +Note, that you can define multiple LDAP servers inside the `ldap_servers` section using distinct names. + +Parameters: + +- `host` - LDAP server hostname or IP, this parameter is mandatory and cannot be empty. +- `port` - LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise. +- `bind_dn` - template used to construct the DN to bind to. + - The resulting DN will be constructed by replacing all `{user_name}` substrings of the + template with the actual user name during each authentication attempt. +- `verification_cooldown` - a period of time, in seconds, after a successful bind attempt, + during which the user will be assumed to be successfully authenticated for all consecutive + requests without contacting the LDAP server. + - Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request. +- `enable_tls` - flag to trigger use of secure connection to the LDAP server. + - Specify `no` for plain text `ldap://` protocol (not recommended). + - Specify `yes` for LDAP over SSL/TLS `ldaps://` protocol (recommended, the default). + - Specify `starttls` for legacy StartTLS protocol (plain text `ldap://` protocol, upgraded to TLS). +- `tls_minimum_protocol_version` - the minimum protocol version of SSL/TLS. + - Accepted values are: `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, `tls1.2` (the default). +- `tls_require_cert` - SSL/TLS peer certificate verification behavior. + - Accepted values are: `never`, `allow`, `try`, `demand` (the default). +- `tls_cert_file` - path to certificate file. +- `tls_key_file` - path to certificate key file. +- `tls_ca_cert_file` - path to CA certificate file. +- `tls_ca_cert_dir` - path to the directory containing CA certificates. +- `tls_cipher_suite` - allowed cipher suite (in OpenSSL notation). + +## LDAP External Authenticator {#ldap-external-authenticator} + +A remote LDAP server can be used as a method for verifying passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition. + +At each login attempt, ClickHouse will try to "bind" to the specified DN defined by the `bind_dn` parameter in the [LDAP server definition](#ldap-server-definition) using the provided credentials, and if successful, the user will be considered authenticated. This is often called a "simple bind" method. + +For example, + +```xml + + + + + + + + my_ldap_server + + + + +``` + +Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously. + +When SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users that are authenticated by LDAP servers can also be created using the [CRATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement. + + +```sql +CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server' +``` + +## LDAP Exernal User Directory {#ldap-external-user-directory} + +In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file. + +At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled and roles are created using the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement. + +Example (goes into `config.xml`): + +```xml + + + + + + my_ldap_server + + + + + + ou=groups,dc=example,dc=com + subtree + (&(objectClass=groupOfNames)(member={bind_dn})) + cn + clickhouse_ + + + + +``` + +Note that `my_ldap_server` referred in the `ldap` section inside the `user_directories` section must be a previously +defined LDAP server that is configured in the `config.xml` (see [LDAP Server Definition](#ldap-server-definition)). + +Parameters: + +- `server` - one of LDAP server names defined in the `ldap_servers` config section above. + This parameter is mandatory and cannot be empty. +- `roles` - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. + - If no roles are specified here or assigned during role mapping (below), user will not be able + to perform any actions after authentication. +- `role_mapping` - section with LDAP search parameters and mapping rules. + - When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter` + and the name of the logged in user. For each entry found during that search, the value of the specified + attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed, + and the rest of the value becomes the name of a local role defined in ClickHouse, + which is expected to be created beforehand by the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement. + - There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied. + - `base_dn` - template used to construct the base DN for the LDAP search. + - The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}` + substrings of the template with the actual user name and bind DN during each LDAP search. + - `scope` - scope of the LDAP search. + - Accepted values are: `base`, `one_level`, `children`, `subtree` (the default). + - `search_filter` - template used to construct the search filter for the LDAP search. + - The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` + substrings of the template with the actual user name, bind DN, and base DN during each LDAP search. + - Note, that the special characters must be escaped properly in XML. + - `attribute` - attribute name whose values will be returned by the LDAP search. + - `prefix` - prefix, that will be expected to be in front of each string in the original + list of strings returned by the LDAP search. Prefix will be removed from the original + strings and resulting strings will be treated as local role names. Empty, by default. + diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index c637ef03f71..56c3eaf6455 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -29,6 +29,8 @@ Let’s look at the section of the ‘users.xml’ file that defines quotas. 0 + 0 + 0 0 0 0 @@ -48,6 +50,8 @@ The resource consumption calculated for each interval is output to the server lo 3600 1000 + 100 + 100 100 1000000000 100000000000 @@ -58,6 +62,8 @@ The resource consumption calculated for each interval is output to the server lo 86400 10000 + 10000 + 10000 1000 5000000000 500000000000 @@ -74,6 +80,10 @@ Here are the amounts that can be restricted: `queries` – The total number of requests. +`query_selects` – The total number of select requests. + +`query_inserts` – The total number of insert requests. + `errors` – The number of queries that threw an exception. `result_rows` – The total number of rows given as a result. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a1ed34f10bb..89fcbafe663 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -296,11 +296,33 @@ Useful for breaking away from a specific network interface. example.yandex.ru ``` +## interserver_https_port {#interserver-https-port} + +Port for exchanging data between ClickHouse servers over `HTTPS`. + +**Example** + +``` xml +9010 +``` + +## interserver_https_host {#interserver-https-host} + +Similar to `interserver_http_host`, except that this hostname can be used by other servers to access this server over `HTTPS`. + +**Example** + +``` xml +example.yandex.ru +``` + ## interserver_http_credentials {#server-settings-interserver-http-credentials} The username and password used to authenticate during [replication](../../engines/table-engines/mergetree-family/replication.md) with the Replicated\* engines. These credentials are used only for communication between replicas and are unrelated to credentials for ClickHouse clients. The server is checking these credentials for connecting replicas and use the same credentials when connecting to other replicas. So, these credentials should be set the same for all replicas in a cluster. By default, the authentication is not used. +**Note:** These credentials are common for replication through `HTTP` and `HTTPS`. + This section contains the following parameters: - `user` — username. diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index e0f7c79dcab..77b68715ba9 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -186,5 +186,16 @@ Possible values: Default value: auto (number of CPU cores). During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound). +## max_partitions_to_read {#max-partitions-to-read} + +Limits the maximum number of partitions that can be accessed in one query. + +The setting value specified when the table is created can be overridden via query-level setting. + +Possible values: + +- Any positive integer. + +Default value: -1 (unlimited). [Original article](https://clickhouse.tech/docs/en/operations/settings/merge_tree_settings/) diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index 3e15d9e6dea..ee834dca98a 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -139,7 +139,7 @@ You can assign a quotas set for the user. For a detailed description of quotas c ### user_name/databases {#user-namedatabases} -In this section, you can you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security. +In this section, you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security. **Example** diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 85a3b8bd941..6440f09bb40 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -312,7 +312,7 @@ Enables or disables parsing enum values as enum ids for TSV input format. Possible values: - 0 — Enum values are parsed as values. -- 1 — Enum values are parsed as enum IDs +- 1 — Enum values are parsed as enum IDs. Default value: 0. @@ -428,7 +428,7 @@ Possible values: - `'basic'` — Use basic parser. - ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `'2019-08-20 10:18:56'` or `2019-08-20`. + ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. Default value: `'basic'`. @@ -443,19 +443,19 @@ Allows choosing different output formats of the text representation of date and Possible values: -- `'simple'` - Simple output format. +- `simple` - Simple output format. - Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `'2019-08-20 10:18:56'`. Calculation is performed according to the data type's time zone (if present) or server time zone. + Clickhouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. -- `'iso'` - ISO output format. +- `iso` - ISO output format. - Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `'2019-08-20T10:18:56Z'`. Note that output is in UTC (`Z` means UTC). + Clickhouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). -- `'unix_timestamp'` - Unix timestamp output format. +- `unix_timestamp` - Unix timestamp output format. - Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `'1566285536'`. + Clickhouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. -Default value: `'simple'`. +Default value: `simple`. See also: @@ -1944,6 +1944,21 @@ Possible values: Default value: 16. +## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size} + +Sets the number of threads performing background tasks for message streaming. This setting is applied at the ClickHouse server start and can’t be changed in a user session. + +Possible values: + +- Any positive integer. + +Default value: 16. + +**See Also** + +- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine +- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine + ## validate_polygons {#validate_polygons} Enables or disables throwing an exception in the [pointInPolygon](../../sql-reference/functions/geo/index.md#pointinpolygon) function, if the polygon is self-intersecting or self-tangent. @@ -2577,4 +2592,90 @@ Possible values: Default value: `16`. +## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} + +Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied). + +Possible values: + +- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied). +- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. +- 1 — The trace for all executed queries is enabled. + +Default value: `0`. + +## optimize_on_insert {#optimize-on-insert} + +Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine). + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +**Example** + +The difference between enabled and disabled: + +Query: + +```sql +SET optimize_on_insert = 1; + +CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable; + +INSERT INTO test1 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test1; + +SET optimize_on_insert = 0; + +CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable; + +INSERT INTO test2 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test2; +``` + +Result: + +``` text +┌─FirstTable─┐ +│ 0 │ +│ 1 │ +└────────────┘ + +┌─SecondTable─┐ +│ 0 │ +│ 0 │ +│ 0 │ +│ 1 │ +│ 1 │ +└─────────────┘ +``` + +Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour. + [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) + +## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists} + +Allows to select data from a file engine table without file. + +Possible values: +- 0 — `SELECT` throws exception. +- 1 — `SELECT` returns empty result. + +Default value: `0`. + +## engine_file_truncate_on_insert {#engine-file-truncate-on-insert} + +Enables or disables truncate before insert in file engine tables. + +Possible values: +- 0 — Disabled. +- 1 — Enabled. + +Default value: `0`. diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md index 643bdee6def..c252458af8a 100644 --- a/docs/en/operations/system-tables/distributed_ddl_queue.md +++ b/docs/en/operations/system-tables/distributed_ddl_queue.md @@ -1,22 +1,21 @@ # system.distributed_ddl_queue {#system_tables-distributed_ddl_queue} -Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster. +Contains information about [distributed ddl queries (ON CLUSTER clause)](../../sql-reference/distributed-ddl.md) that were executed on a cluster. Columns: -- `entry` ([String](../../sql-reference/data-types/string.md)) - Query id. -- `host_name` ([String](../../sql-reference/data-types/string.md)) - Hostname. -- `host_address` ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port. -- `status` ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query. -- `cluster` ([String](../../sql-reference/data-types/string.md)) - Cluster name. -- `query` ([String](../../sql-reference/data-types/string.md)) - Query executed. -- `initiator` ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query. -- `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time. -- `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds. -- `exception_code` ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper. - +- `entry` ([String](../../sql-reference/data-types/string.md)) — Query id. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query executed. +- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. +- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution (in milliseconds). +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper). **Example** @@ -62,6 +61,5 @@ exception_code: ZOK 2 rows in set. Elapsed: 0.025 sec. ``` - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) \ No newline at end of file diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md new file mode 100644 index 00000000000..e45a989742c --- /dev/null +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -0,0 +1,53 @@ +# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log} + +Contains information about [trace spans](https://opentracing.io/docs/overview/spans/) for executed queries. + +Columns: + +- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — ID of the trace for executed query. + +- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. + +- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`. + +- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. + +- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). + +- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). + +- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. + +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. + +- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. + +**Example** + +Query: + +``` sql +SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914 +span_id: 701487461015578150 +parent_span_id: 2991972114672045096 +operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl() +start_time_us: 1612374594529090 +finish_time_us: 1612374594529108 +finish_date: 2021-02-03 +attribute.names: [] +attribute.values: [] +``` + +**See Also** + +- [OpenTelemetry](../../operations/opentelemetry.md) + +[Original article](https://clickhouse.tech/docs/en/operations/system_tables/opentelemetry_span_log) diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index 9aa95b1a493..579fdaefb0a 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -6,29 +6,65 @@ This table contains information about events that occurred with [data parts](../ The `system.part_log` table contains the following columns: -- `event_type` (Enum) — Type of the event that occurred with the data part. Can have one of the following values: +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part. +- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: - `NEW_PART` — Inserting of a new data part. - `MERGE_PARTS` — Merging of data parts. - `DOWNLOAD_PART` — Downloading a data part. - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). - `MUTATE_PART` — Mutating of a data part. - `MOVE_PART` — Moving the data part from the one disk to another one. -- `event_date` (Date) — Event date. -- `event_time` (DateTime) — Event time. -- `duration_ms` (UInt64) — Duration. -- `database` (String) — Name of the database the data part is in. -- `table` (String) — Name of the table the data part is in. -- `part_name` (String) — Name of the data part. -- `partition_id` (String) — ID of the partition that the data part was inserted to. The column takes the ‘all’ value if the partitioning is by `tuple()`. -- `rows` (UInt64) — The number of rows in the data part. -- `size_in_bytes` (UInt64) — Size of the data part in bytes. -- `merged_from` (Array(String)) — An array of names of the parts which the current part was made up from (after the merge). -- `bytes_uncompressed` (UInt64) — Size of uncompressed bytes. -- `read_rows` (UInt64) — The number of rows was read during the merge. -- `read_bytes` (UInt64) — The number of bytes was read during the merge. -- `error` (UInt16) — The code number of the occurred error. -- `exception` (String) — Text message of the occurred error. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision. + +- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in. +- `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`. +- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part. +- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes. +- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge). +- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. +- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error. +- `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error. The `system.part_log` table is created after the first inserting data to the `MergeTree` table. +**Example** + +``` sql +SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31 +event_type: NewPart +event_date: 2021-02-02 +event_time: 2021-02-02 11:14:28 +event_time_microseconds: 2021-02-02 11:14:28.861919 +duration_ms: 35 +database: default +table: log_mt_2 +part_name: all_1_1_0 +partition_id: all +path_on_disk: db/data/default/log_mt_2/all_1_1_0/ +rows: 115418 +size_in_bytes: 1074311 +merged_from: [] +bytes_uncompressed: 0 +read_rows: 0 +read_bytes: 0 +peak_memory_usage: 0 +error: 0 +exception: +``` + [Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) diff --git a/docs/en/operations/system-tables/quota_limits.md b/docs/en/operations/system-tables/quota_limits.md index 065296f5df3..c2dcb4db34d 100644 --- a/docs/en/operations/system-tables/quota_limits.md +++ b/docs/en/operations/system-tables/quota_limits.md @@ -9,6 +9,8 @@ Columns: - `0` — Interval is not randomized. - `1` — Interval is randomized. - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of queries. +- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select queries. +- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert queries. - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors. - `max_result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of result rows. - `max_result_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of RAM volume in bytes used to store a queries result. diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 0eb59fd6453..17af9ad9a30 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -9,6 +9,8 @@ Columns: - `end_time`([Nullable](../../sql-reference/data-types/nullable.md)([DateTime](../../sql-reference/data-types/datetime.md))) — End time for calculating resource consumption. - `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds. - `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests on this interval. +- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests on this interval. +- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests on this interval. - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests. - `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception. - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors. diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index f4f52a4a131..3e797c9bdc6 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -7,16 +7,16 @@ Columns: - `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Quota ID. - `storage`([String](../../sql-reference/data-types/string.md)) — Storage of quotas. Possible value: “users.xml” if a quota configured in the users.xml file, “disk” if a quota configured by an SQL-query. - `keys` ([Array](../../sql-reference/data-types/array.md)([Enum8](../../sql-reference/data-types/enum.md))) — Key specifies how the quota should be shared. If two connections use the same quota and key, they share the same amounts of resources. Values: -- `[]` — All users share the same quota. -- `['user_name']` — Connections with the same user name share the same quota. -- `['ip_address']` — Connections from the same IP share the same quota. -- `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header. -- `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`. -- `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`. + - `[]` — All users share the same quota. + - `['user_name']` — Connections with the same user name share the same quota. + - `['ip_address']` — Connections from the same IP share the same quota. + - `['client_key']` — Connections with the same key share the same quota. A key must be explicitly provided by a client. When using [clickhouse-client](../../interfaces/cli.md), pass a key value in the `--quota-key` parameter, or use the `quota_key` parameter in the client configuration file. When using HTTP interface, use the `X-ClickHouse-Quota` header. + - `['user_name', 'client_key']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `user_name`. + - `['client_key', 'ip_address']` — Connections with the same `client_key` share the same quota. If a key isn’t provided by a client, the qouta is tracked for `ip_address`. - `durations` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Time interval lengths in seconds. - `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows which users the quota is applied to. Values: -- `0` — The quota applies to users specify in the `apply_to_list`. -- `1` — The quota applies to all users except those listed in `apply_to_except`. + - `0` — The quota applies to users specify in the `apply_to_list`. + - `1` — The quota applies to all users except those listed in `apply_to_except`. - `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/[roles](../../operations/access-rights.md#role-management) that the quota should be applied to. - `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of user names/roles that the quota should not apply to. diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index ed6be820b26..31aafd3e697 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -11,6 +11,10 @@ Columns: - `duration` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Length of the time interval for calculating resource consumption, in seconds. - `queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of requests in this interval. - `max_queries` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of requests. +- `query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of select requests in this interval. +- `max_query_selects` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of select requests. +- `query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of insert requests in this interval. +- `max_query_inserts` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of insert requests. - `errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The number of queries that threw an exception. - `max_errors` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of errors. - `result_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total number of rows given as a result. diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 8107f60b808..2903e0d3bd7 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -12,7 +12,7 @@ Columns: - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. -- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment with microseconds precision. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision. - `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index ddb4d305964..82ace5e81dc 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -1,12 +1,16 @@ # system.zookeeper {#system-zookeeper} The table does not exist if ZooKeeper is not configured. Allows reading data from the ZooKeeper cluster defined in the config. -The query must have a ‘path’ equality condition in the WHERE clause. This is the path in ZooKeeper for the children that you want to get data for. +The query must either have a ‘path =’ condition or a `path IN` condition set with the `WHERE` clause as shown below. This corresponds to the path of the children in ZooKeeper that you want to get data for. The query `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` outputs data for all children on the `/clickhouse` node. To output data for all root nodes, write path = ‘/’. If the path specified in ‘path’ doesn’t exist, an exception will be thrown. +The query `SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` outputs data for all children on the `/` and `/clickhouse` node. +If in the specified ‘path’ collection has doesn't exist path, an exception will be thrown. +It can be used to do a batch of ZooKeeper path queries. + Columns: - `name` (String) — The name of the node. diff --git a/docs/en/operations/update.md b/docs/en/operations/update.md index edacf1ff973..9fa9c44e130 100644 --- a/docs/en/operations/update.md +++ b/docs/en/operations/update.md @@ -1,9 +1,9 @@ --- toc_priority: 47 -toc_title: ClickHouse Update +toc_title: ClickHouse Upgrade --- -# ClickHouse Update {#clickhouse-update} +# ClickHouse Upgrade {#clickhouse-upgrade} If ClickHouse was installed from `deb` packages, execute the following commands on the server: @@ -16,3 +16,19 @@ $ sudo service clickhouse-server restart If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method. ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time. + +The upgrade of older version of ClickHouse to specific version: + +As an example: + +`xx.yy.a.b` is a current stable version. The latest stable version could be found [here](https://github.com/ClickHouse/ClickHouse/releases) + +```bash +$ sudo apt-get update +$ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b +$ sudo service clickhouse-server restart +``` + + + + diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 431968bc629..015c90e90c7 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -72,7 +72,7 @@ If an aggregate function doesn’t have input values, with this combinator it re OrDefault(x) ``` -**Parameters** +**Arguments** - `x` — Aggregate function parameters. @@ -132,7 +132,7 @@ This combinator converts a result of an aggregate function to the [Nullable](../ OrNull(x) ``` -**Parameters** +**Arguments** - `x` — Aggregate function parameters. @@ -189,7 +189,7 @@ Lets you divide data into groups, and then separately aggregates the data in tho Resample(start, end, step)(, resampling_key) ``` -**Parameters** +**Arguments** - `start` — Starting value of the whole required interval for `resampling_key` values. - `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval doesn’t include the `stop` value `[start, stop)`. diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 3b02e145ff4..035bc91b9ed 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -17,10 +17,13 @@ histogram(number_of_bins)(values) The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf). The borders of histogram bins are adjusted as new data enters a function. In common case, the widths of bins are not equal. +**Arguments** + +`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values. + **Parameters** `number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins. -`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values. **Returned values** @@ -89,14 +92,16 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...) !!! warning "Warning" Events that occur at the same second may lay in the sequence in an undefined order affecting the result. -**Parameters** - -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +**Arguments** - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. - `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +**Parameters** + +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). + **Returned values** - 1, if the pattern is matched. @@ -176,14 +181,16 @@ Counts the number of event chains that matched the pattern. The function searche sequenceCount(pattern)(timestamp, cond1, cond2, ...) ``` -**Parameters** - -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +**Arguments** - `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. - `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +**Parameters** + +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). + **Returned values** - Number of non-overlapping event chains that are matched. @@ -239,14 +246,17 @@ The function works according to the algorithm: windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN) ``` -**Parameters** +**Arguments** -- `window` — Length of the sliding window in seconds. -- `mode` - It is an optional argument. - - `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values. - `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). - `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). +**Parameters** + +- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`. +- `mode` - It is an optional parameter. + - `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values. + **Returned value** The maximum number of consecutive triggered conditions from the chain within the sliding time window. @@ -324,7 +334,7 @@ The conditions, except the first, apply in pairs: the result of the second will retention(cond1, cond2, ..., cond32); ``` -**Parameters** +**Arguments** - `cond` — an expression that returns a `UInt8` result (1 or 0). diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 35e87d49e60..7639117042f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -4,13 +4,42 @@ toc_priority: 106 # argMax {#agg-function-argmax} -Syntax: `argMax(arg, val)` or `argMax(tuple(arg, val))` +Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, returns the first of these values encountered. -Calculates the `arg` value for a maximum `val` value. If there are several different values of `arg` for maximum values of `val`, the first of these values encountered is output. +Tuple version of this function will return the tuple with the maximum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). -Tuple version of this function will return the tuple with the maximum `val` value. It is convinient for use with `SimpleAggregateFunction`. +**Syntax** -**Example:** +``` sql +argMax(arg, val) +``` + +or + +``` sql +argMax(tuple(arg, val)) +``` + +**Arguments** + +- `arg` — Argument. +- `val` — Value. + +**Returned value** + +- `arg` value that corresponds to maximum `val` value. + +Type: matches `arg` type. + +For tuple in the input: + +- Tuple `(arg, val)`, where `val` is the maximum value and `arg` is a corresponding value. + +Type: [Tuple](../../../sql-reference/data-types/tuple.md). + +**Example** + +Input table: ``` text ┌─user─────┬─salary─┐ @@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the maximum `val` valu └──────────┴────────┘ ``` +Query: + ``` sql -SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary +SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary; ``` +Result: + ``` text ┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐ │ director │ ('director',5000) │ └──────────────────────┴─────────────────────────────┘ ``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 72c9bce6817..7ddc38cd28a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -4,13 +4,42 @@ toc_priority: 105 # argMin {#agg-function-argmin} -Syntax: `argMin(arg, val)` or `argMin(tuple(arg, val))` +Calculates the `arg` value for a minimum `val` value. If there are several different values of `arg` for minimum values of `val`, returns the first of these values encountered. -Calculates the `arg` value for a minimal `val` value. If there are several different values of `arg` for minimal values of `val`, the first of these values encountered is output. +Tuple version of this function will return the tuple with the minimum `val` value. It is convenient for use with [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). -Tuple version of this function will return the tuple with the minimal `val` value. It is convinient for use with `SimpleAggregateFunction`. +**Syntax** -**Example:** +``` sql +argMin(arg, val) +``` + +or + +``` sql +argMin(tuple(arg, val)) +``` + +**Arguments** + +- `arg` — Argument. +- `val` — Value. + +**Returned value** + +- `arg` value that corresponds to minimum `val` value. + +Type: matches `arg` type. + +For tuple in the input: + +- Tuple `(arg, val)`, where `val` is the minimum value and `arg` is a corresponding value. + +Type: [Tuple](../../../sql-reference/data-types/tuple.md). + +**Example** + +Input table: ``` text ┌─user─────┬─salary─┐ @@ -20,12 +49,18 @@ Tuple version of this function will return the tuple with the minimal `val` valu └──────────┴────────┘ ``` +Query: + ``` sql -SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary +SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary; ``` +Result: + ``` text ┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐ │ worker │ ('worker',1000) │ └──────────────────────┴─────────────────────────────┘ ``` + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index e2e6aace734..12dc4ac1e9d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -12,7 +12,7 @@ Calculates the arithmetic mean. avgWeighted(x) ``` -**Parameter** +**Arguments** - `x` — Values. diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 7b9c0de2755..2df09e560b4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -12,7 +12,7 @@ Calculates the [weighted arithmetic mean](https://en.wikipedia.org/wiki/Weighted avgWeighted(x, weight) ``` -**Parameters** +**Arguments** - `x` — Values. - `weight` — Weights of the values. diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index e5d31429e12..0a5aef2fe97 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -10,7 +10,7 @@ ClickHouse supports the following syntaxes for `count`: - `count(expr)` or `COUNT(DISTINCT expr)`. - `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. -**Parameters** +**Arguments** The function can take: diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md new file mode 100644 index 00000000000..bb6f802ccaf --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md @@ -0,0 +1,19 @@ +--- +toc_priority: 141 +--- + +# deltaSum {#agg_functions-deltasum} + +Syntax: `deltaSum(value)` + +Adds the differences between consecutive rows. If the difference is negative, it is ignored. +`value` must be some integer or floating point type. + +Example: + +```sql +select deltaSum(arrayJoin([1, 2, 3])); -- => 2 +select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3])); -- => 7 +select deltaSum(arrayJoin([2.25, 3, 4.5])); -- => 2.25 +``` + diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index f4b8665a0a4..68456bf7844 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -17,7 +17,7 @@ If in one query several values are inserted into the same position, the function - If a query is executed in a single thread, the first one of the inserted values is used. - If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values. -**Parameters** +**Arguments** - `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md). - `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md index 1cd40c2002f..c732efecf58 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -13,7 +13,7 @@ groupArrayMovingAvg(window_size)(numbers_for_summing) The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. -**Parameters** +**Arguments** - `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. - `window_size` — Size of the calculation window. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md index ef979cd5f6a..c3dfeda850e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -13,7 +13,7 @@ groupArrayMovingSum(window_size)(numbers_for_summing) The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column. -**Parameters** +**Arguments** - `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. - `window_size` — Size of the calculation window. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md index 36fa6a9d661..df0b8120eef 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md @@ -12,7 +12,7 @@ Creates an array of sample argument values. The size of the resulting array is l groupArraySample(max_size[, seed])(x) ``` -**Parameters** +**Arguments** - `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md). - `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md index 9be73fd54ec..1275ad7536c 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md @@ -10,7 +10,7 @@ Applies bitwise `AND` for series of numbers. groupBitAnd(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md index 9367652db38..9317ef98783 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md @@ -10,7 +10,7 @@ Bitmap or Aggregate calculations from a unsigned integer column, return cardinal groupBitmap(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md index 7c0c89040bb..f59bb541a42 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md @@ -10,7 +10,7 @@ Calculations the AND of a bitmap column, return cardinality of type UInt64, if a groupBitmapAnd(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md index 894c6c90aab..a4d99fd29e3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md @@ -10,7 +10,7 @@ Calculations the OR of a bitmap column, return cardinality of type UInt64, if ad groupBitmapOr(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md index 5d0ec0fb097..834f088d02f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md @@ -10,7 +10,7 @@ Calculations the XOR of a bitmap column, return cardinality of type UInt64, if a groupBitmapOr(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md index 7383e620060..e427a9ad970 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md @@ -10,7 +10,7 @@ Applies bitwise `OR` for series of numbers. groupBitOr(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md index 01026012b91..4b8323f92db 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md +++ b/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md @@ -10,7 +10,7 @@ Applies bitwise `XOR` for series of numbers. groupBitXor(expr) ``` -**Parameters** +**Arguments** `expr` – An expression that results in `UInt*` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md index ea44d5f1ddd..313d6bf81f5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md +++ b/docs/en/sql-reference/aggregate-functions/reference/initializeAggregation.md @@ -13,7 +13,7 @@ Use it for tests or to process columns of types `AggregateFunction` and `Aggrega initializeAggregation (aggregate_function, column_1, column_2); ``` -**Parameters** +**Arguments** - `aggregate_function` — Name of the aggregation function. The state of this function — the creating one. [String](../../../sql-reference/data-types/string.md#string). - `column_n` — The column to translate it into the function as it's argument. [String](../../../sql-reference/data-types/string.md#string). diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md index 65e7e31b9b4..db402c99663 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md @@ -10,7 +10,7 @@ Computes the [kurtosis](https://en.wikipedia.org/wiki/Kurtosis) of a sequence. kurtPop(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md index 224bbbdb9e7..4bb9f76763b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md @@ -12,7 +12,7 @@ It represents an unbiased estimate of the kurtosis of a random variable if passe kurtSamp(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md new file mode 100644 index 00000000000..dc5fc45b878 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -0,0 +1,74 @@ +--- +toc_priority: 310 +toc_title: mannWhitneyUTest +--- + +# mannWhitneyUTest {#mannwhitneyutest} + +Applies the Mann-Whitney rank test to samples from two populations. + +**Syntax** + +``` sql +mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index) +``` + +Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. +The null hypothesis is that two populations are stochastically equal. Also one-sided hypothesises can be tested. This test does not assume that data have normal distribution. + +**Arguments** + +- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Parameters** + +- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). + - `'two-sided'`; + - `'greater'`; + - `'less'`. +- `continuity_correction` - if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: + +- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Input table: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 10 │ 0 │ +│ 11 │ 0 │ +│ 12 │ 0 │ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 1 │ +└─────────────┴──────────────┘ +``` + +Query: + +``` sql +SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest; +``` + +Result: + +``` text +┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐ +│ (9,0.04042779918503192) │ +└────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test) +- [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering) + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md index 77f858a1735..d625ef4cfd9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -18,7 +18,7 @@ quantile(level)(expr) Alias: `median`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md index 6046447dd10..a20ac26f599 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -18,7 +18,7 @@ quantileDeterministic(level)(expr, determinator) Alias: `medianDeterministic`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index a39f724f368..06ef7ccfbd3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -18,7 +18,7 @@ quantileExact(level)(expr) Alias: `medianExact`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). @@ -77,7 +77,7 @@ quantileExact(level)(expr) Alias: `medianExactLow`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). @@ -128,7 +128,7 @@ quantileExactHigh(level)(expr) Alias: `medianExactHigh`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md index 3251f8298a6..210f44e7587 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -18,7 +18,7 @@ quantileExactWeighted(level)(expr, weight) Alias: `medianExactWeighted`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md index bda98ea338d..dcc665a68af 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -20,7 +20,7 @@ quantileTDigest(level)(expr) Alias: `medianTDigest`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index 309cbe95e95..56ef598f7e7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -20,7 +20,7 @@ quantileTDigest(level)(expr) Alias: `medianTDigest`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md index 867e8b87e74..58ce6495a96 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -18,7 +18,7 @@ quantileTiming(level)(expr) Alias: `medianTiming`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index 0f8606986c8..fb3b9dbf4d2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -18,7 +18,7 @@ quantileTimingWeighted(level)(expr, weight) Alias: `medianTimingWeighted`. -**Parameters** +**Arguments** - `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). @@ -79,6 +79,40 @@ Result: └───────────────────────────────────────────────┘ ``` +# quantilesTimingWeighted {#quantilestimingweighted} + +Same as `quantileTimingWeighted`, but accept multiple parameters with quantile levels and return an Array filled with many values of that quantiles. + + +**Example** + +Input table: + +``` text +┌─response_time─┬─weight─┐ +│ 68 │ 1 │ +│ 104 │ 2 │ +│ 112 │ 3 │ +│ 126 │ 2 │ +│ 138 │ 1 │ +│ 162 │ 1 │ +└───────────────┴────────┘ +``` + +Query: + +``` sql +SELECT quantilesTimingWeighted(0,5, 0.99)(response_time, weight) FROM t +``` + +Result: + +``` text +┌─quantilesTimingWeighted(0.5, 0.99)(response_time, weight)─┐ +│ [112,162] │ +└───────────────────────────────────────────────────────────┘ +``` + **See Also** - [median](../../../sql-reference/aggregate-functions/reference/median.md#median) diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md index dc23029f239..55ee1b8289b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -8,7 +8,7 @@ Computes a rank correlation coefficient. rankCorr(x, y) ``` -**Parameters** +**Arguments** - `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). - `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md index d15a5ffdd47..b9dfc390f9d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewpop.md @@ -10,7 +10,7 @@ Computes the [skewness](https://en.wikipedia.org/wiki/Skewness) of a sequence. skewPop(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md index cb323f4b142..f7a6df8f507 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md @@ -12,7 +12,7 @@ It represents an unbiased estimate of the skewness of a random variable if passe skewSamp(expr) ``` -**Parameters** +**Arguments** `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) returning a number. diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md new file mode 100644 index 00000000000..a1d7ae33fe1 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -0,0 +1,66 @@ +--- +toc_priority: 300 +toc_title: studentTTest +--- + +# studentTTest {#studentttest} + +Applies Student's t-test to samples from two populations. + +**Syntax** + +``` sql +studentTTest(sample_data, sample_index) +``` + +Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. +The null hypothesis is that means of populations are equal. Normal distribution with equal variances is assumed. + +**Arguments** + +- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: + +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Input table: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 20.3 │ 0 │ +│ 21.1 │ 0 │ +│ 21.9 │ 1 │ +│ 21.7 │ 0 │ +│ 19.9 │ 1 │ +│ 21.8 │ 1 │ +└─────────────┴──────────────┘ +``` + +Query: + +``` sql +SELECT studentTTest(sample_data, sample_index) FROM student_ttest; +``` + +Result: + +``` text +┌─studentTTest(sample_data, sample_index)───┐ +│ (-0.21739130434783777,0.8385421208415731) │ +└───────────────────────────────────────────┘ +``` + +**See Also** + +- [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test) +- [welchTTest function](welchttest.md#welchttest) + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/studentttest/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index 004a67d33af..b3e79803ba1 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -16,7 +16,7 @@ This function doesn’t provide a guaranteed result. In certain situations, erro We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`. -**Parameters** +**Arguments** - ‘N’ is the number of elements to return. diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index b597317f44e..02b9f77ea6f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -12,7 +12,7 @@ Similar to `topK` but takes one additional argument of integer type - `weight`. topKWeighted(N)(x, weight) ``` -**Parameters** +**Arguments** - `N` — The number of elements to return. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index 81d1ec6761e..7ba2cdc6cb8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -10,7 +10,7 @@ Calculates the approximate number of different values of the argument. uniq(x[, ...]) ``` -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index c52486bc38f..4434686ae61 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -12,7 +12,7 @@ uniqCombined(HLL_precision)(x[, ...]) The `uniqCombined` function is a good choice for calculating the number of different values. -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index 9a6224533c8..eee675016ee 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -14,7 +14,7 @@ Use the `uniqExact` function if you absolutely need an exact result. Otherwise u The `uniqExact` function uses more memory than `uniq`, because the size of the state has unbounded growth as the number of different values increases. -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index fcddc22cc46..5b23ea81eae 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -10,7 +10,7 @@ Calculates the approximate number of different argument values, using the [Hyper uniqHLL12(x[, ...]) ``` -**Parameters** +**Arguments** The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md new file mode 100644 index 00000000000..b391fb1d979 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -0,0 +1,66 @@ +--- +toc_priority: 301 +toc_title: welchTTest +--- + +# welchTTest {#welchttest} + +Applies Welch's t-test to samples from two populations. + +**Syntax** + +``` sql +welchTTest(sample_data, sample_index) +``` + +Values of both samples are in the `sample_data` column. If `sample_index` equals to 0 then the value in that row belongs to the sample from the first population. Otherwise it belongs to the sample from the second population. +The null hypothesis is that means of populations are equal. Normal distribution is assumed. Populations may have unequal variance. + +**Arguments** + +- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Returned values** + +[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: + +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). + + +**Example** + +Input table: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 20.3 │ 0 │ +│ 22.1 │ 0 │ +│ 21.9 │ 0 │ +│ 18.9 │ 1 │ +│ 20.3 │ 1 │ +│ 19 │ 1 │ +└─────────────┴──────────────┘ +``` + +Query: + +``` sql +SELECT welchTTest(sample_data, sample_index) FROM welch_ttest; +``` + +Result: + +``` text +┌─welchTTest(sample_data, sample_index)─────┐ +│ (2.7988719532211235,0.051807360348581945) │ +└───────────────────────────────────────────┘ +``` + +**See Also** + +- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test) +- [studentTTest function](studentttest.md#studentttest) + +[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/welchTTest/) diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 48957498d63..41e35aaa96f 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -45,6 +45,8 @@ SELECT [1, 2] AS x, toTypeName(x) ## Working with Data Types {#working-with-data-types} +The maximum size of an array is limited to one million elements. + When creating an array on the fly, ClickHouse automatically defines the argument type as the narrowest data type that can store all the listed arguments. If there are any [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) or literal [NULL](../../sql-reference/syntax.md#null-literal) values, the type of an array element also becomes [Nullable](../../sql-reference/data-types/nullable.md). If ClickHouse couldn’t determine the data type, it generates an exception. For instance, this happens when trying to create an array with strings and numbers simultaneously (`SELECT array(1, 'a')`). diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md new file mode 100644 index 00000000000..58634e5b669 --- /dev/null +++ b/docs/en/sql-reference/data-types/map.md @@ -0,0 +1,83 @@ +--- +toc_priority: 65 +toc_title: Map(key, value) +--- + +# Map(key, value) {#data_type-map} + +`Map(key, value)` data type stores `key:value` pairs. + +**Parameters** +- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md). +- `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md). + +!!! warning "Warning" + Currently `Map` data type is an experimental feature. To work with it you must set `allow_experimental_map_type = 1`. + +To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity. + +**Examples** + +Consider the table: + +``` sql +CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory; +INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); +``` + +Select all `key2` values: + +```sql +SELECT a['key2'] FROM table_map; +``` +Result: + +```text +┌─arrayElement(a, 'key2')─┐ +│ 10 │ +│ 20 │ +│ 30 │ +└─────────────────────────┘ +``` + +If there's no such `key` in the `Map()` column, the query returns zeros for numerical values, empty strings or empty arrays. + +```sql +INSERT INTO table_map VALUES ({'key3':100}), ({}); +SELECT a['key3'] FROM table_map; +``` + +Result: + +```text +┌─arrayElement(a, 'key3')─┐ +│ 100 │ +│ 0 │ +└─────────────────────────┘ +┌─arrayElement(a, 'key3')─┐ +│ 0 │ +│ 0 │ +│ 0 │ +└─────────────────────────┘ +``` + +## Convert Tuple to Map Type {#map-and-tuple} + +You can cast `Tuple()` as `Map()` using [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function: + +``` sql +SELECT CAST(([1, 2, 3], ['Ready', 'Steady', 'Go']), 'Map(UInt8, String)') AS map; +``` + +``` text +┌─map───────────────────────────┐ +│ {1:'Ready',2:'Steady',3:'Go'} │ +└───────────────────────────────┘ +``` + +**See Also** + +- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function +- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function + +[Original article](https://clickhouse.tech/docs/en/data-types/map/) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 05c418b1f15..efef91b4b09 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -208,8 +208,8 @@ This function returns the value for the specified `id`s and the date range that Details of the algorithm: - If the `id` is not found or a range is not found for the `id`, it returns the default value for the dictionary. -- If there are overlapping ranges, you can use any. -- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01 or 2039-01-01), the range is left open. The range can be open on both sides. +- If there are overlapping ranges, it returns value for any (random) range. +- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01), the range is open. The range can be open on both sides. Configuration example: diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index dc7727bdfd8..c9c418d57a4 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -61,7 +61,7 @@ Combines arrays passed as arguments. arrayConcat(arrays) ``` -**Parameters** +**Arguments** - `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. **Example** @@ -111,7 +111,7 @@ Checks whether one array is a subset of another. hasAll(set, subset) ``` -**Parameters** +**Arguments** - `set` – Array of any type with a set of elements. - `subset` – Array of any type with elements that should be tested to be a subset of `set`. @@ -149,7 +149,7 @@ Checks whether two arrays have intersection by some elements. hasAny(array1, array2) ``` -**Parameters** +**Arguments** - `array1` – Array of any type with a set of elements. - `array2` – Array of any type with a set of elements. @@ -191,7 +191,7 @@ For Example: - `hasSubstr([1,2,3,4], [2,3])` returns 1. However, `hasSubstr([1,2,3,4], [3,2])` will return `0`. - `hasSubstr([1,2,3,4], [1,2,3])` returns 1. However, `hasSubstr([1,2,3,4], [1,2,4])` will return `0`. -**Parameters** +**Arguments** - `array1` – Array of any type with a set of elements. - `array2` – Array of any type with a set of elements. @@ -369,7 +369,7 @@ Removes the last item from the array. arrayPopBack(array) ``` -**Parameters** +**Arguments** - `array` – Array. @@ -393,7 +393,7 @@ Removes the first item from the array. arrayPopFront(array) ``` -**Parameters** +**Arguments** - `array` – Array. @@ -417,7 +417,7 @@ Adds one item to the end of the array. arrayPushBack(array, single_value) ``` -**Parameters** +**Arguments** - `array` – Array. - `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. @@ -442,7 +442,7 @@ Adds one element to the beginning of the array. arrayPushFront(array, single_value) ``` -**Parameters** +**Arguments** - `array` – Array. - `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. @@ -467,7 +467,7 @@ Changes the length of the array. arrayResize(array, size[, extender]) ``` -**Parameters:** +**Arguments:** - `array` — Array. - `size` — Required length of the array. @@ -509,7 +509,7 @@ Returns a slice of the array. arraySlice(array, offset[, length]) ``` -**Parameters** +**Arguments** - `array` – Array of data. - `offset` – Indent from the edge of the array. A positive value indicates an offset on the left, and a negative value is an indent on the right. Numbering of the array items begins with 1. @@ -751,7 +751,7 @@ Calculates the difference between adjacent array elements. Returns an array wher arrayDifference(array) ``` -**Parameters** +**Arguments** - `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). @@ -803,7 +803,7 @@ Takes an array, returns an array containing the distinct elements only. arrayDistinct(array) ``` -**Parameters** +**Arguments** - `array` – [Array](https://clickhouse.tech/docs/en/data_types/array/). @@ -871,7 +871,7 @@ Applies an aggregate function to array elements and returns its result. The name arrayReduce(agg_func, arr1, arr2, ..., arrN) ``` -**Parameters** +**Arguments** - `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). - `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. @@ -936,7 +936,7 @@ Applies an aggregate function to array elements in given ranges and returns an a arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) ``` -**Parameters** +**Arguments** - `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). - `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. @@ -1007,7 +1007,7 @@ flatten(array_of_arrays) Alias: `flatten`. -**Parameters** +**Arguments** - `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. @@ -1033,7 +1033,7 @@ Removes consecutive duplicate elements from an array. The order of result values arrayCompact(arr) ``` -**Parameters** +**Arguments** `arr` — The [array](../../sql-reference/data-types/array.md) to inspect. @@ -1069,7 +1069,7 @@ Combines multiple arrays into a single array. The resulting array contains the c arrayZip(arr1, arr2, ..., arrN) ``` -**Parameters** +**Arguments** - `arrN` — [Array](../../sql-reference/data-types/array.md). @@ -1107,7 +1107,7 @@ Calculate AUC (Area Under the Curve, which is a concept in machine learning, see arrayAUC(arr_scores, arr_labels) ``` -**Parameters** +**Arguments** - `arr_scores` — scores prediction model gives. - `arr_labels` — labels of samples, usually 1 for positive sample and 0 for negtive sample. @@ -1288,73 +1288,226 @@ Returns the index of the first element in the `arr1` array for which `func` retu Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. -## arrayMin(\[func,\] arr1, …) {#array-min} +## arrayMin {#array-min} -Returns the min of the `func` values. If the function is omitted, it just returns the min of the array elements. +Returns the minimum of elements in the source array. + +If the `func` function is specified, returns the mininum of elements converted by this function. Note that the `arrayMin` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -Examples: +**Syntax** + ```sql -SELECT arrayMin([1, 2, 4]) AS res +arrayMin([func,] arr) +``` + +**Arguments** + +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- The minimum of function values (or the array minimum). + +Type: if `func` is specified, matches `func` return value type, else matches the array elements type. + +**Examples** + +Query: + +```sql +SELECT arrayMin([1, 2, 4]) AS res; +``` + +Result: + +```text ┌─res─┐ │ 1 │ └─────┘ +``` +Query: -SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res +```sql +SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res; +``` + +Result: + +```text ┌─res─┐ │ -4 │ └─────┘ ``` -## arrayMax(\[func,\] arr1, …) {#array-max} +## arrayMax {#array-max} -Returns the max of the `func` values. If the function is omitted, it just returns the max of the array elements. +Returns the maximum of elements in the source array. + +If the `func` function is specified, returns the maximum of elements converted by this function. Note that the `arrayMax` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -Examples: +**Syntax** + ```sql -SELECT arrayMax([1, 2, 4]) AS res +arrayMax([func,] arr) +``` + +**Arguments** + +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- The maximum of function values (or the array maximum). + +Type: if `func` is specified, matches `func` return value type, else matches the array elements type. + +**Examples** + +Query: + +```sql +SELECT arrayMax([1, 2, 4]) AS res; +``` + +Result: + +```text ┌─res─┐ │ 4 │ └─────┘ +``` +Query: -SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res +```sql +SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res; +``` + +Result: + +```text ┌─res─┐ │ -1 │ └─────┘ ``` -## arraySum(\[func,\] arr1, …) {#array-sum} +## arraySum {#array-sum} -Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements. +Returns the sum of elements in the source array. + +If the `func` function is specified, returns the sum of elements converted by this function. Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -Examples: +**Syntax** + ```sql -SELECT arraySum([2,3]) AS res +arraySum([func,] arr) +``` + +**Arguments** + +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- The sum of the function values (or the array sum). + +Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md). + +**Examples** + +Query: + +```sql +SELECT arraySum([2, 3]) AS res; +``` + +Result: + +```text ┌─res─┐ │ 5 │ └─────┘ +``` +Query: -SELECT arraySum(x -> x*x, [2, 3]) AS res +```sql +SELECT arraySum(x -> x*x, [2, 3]) AS res; +``` + +Result: + +```text ┌─res─┐ │ 13 │ └─────┘ ``` +## arrayAvg {#array-avg} -## arrayAvg(\[func,\] arr1, …) {#array-avg} +Returns the average of elements in the source array. -Returns the average of the `func` values. If the function is omitted, it just returns the average of the array elements. +If the `func` function is specified, returns the average of elements converted by this function. Note that the `arrayAvg` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +**Syntax** + +```sql +arrayAvg([func,] arr) +``` + +**Arguments** + +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- The average of function values (or the array average). + +Type: [Float64](../../sql-reference/data-types/float.md). + +**Examples** + +Query: + +```sql +SELECT arrayAvg([1, 2, 4]) AS res; +``` + +Result: + +```text +┌────────────────res─┐ +│ 2.3333333333333335 │ +└────────────────────┘ +``` + +Query: + +```sql +SELECT arrayAvg(x -> (x * x), [2, 4]) AS res; +``` + +Result: + +```text +┌─res─┐ +│ 10 │ +└─────┘ +``` + ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing. diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 57c2ae42ada..a3d0c82d8ab 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -35,7 +35,7 @@ Takes any integer and converts it into [binary form](https://en.wikipedia.org/wi SELECT bitTest(number, index) ``` -**Parameters** +**Arguments** - `number` – integer number. - `index` – position of bit. @@ -100,7 +100,7 @@ The conjuction for bitwise operations: SELECT bitTestAll(number, index1, index2, index3, index4, ...) ``` -**Parameters** +**Arguments** - `number` – integer number. - `index1`, `index2`, `index3`, `index4` – positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). @@ -165,7 +165,7 @@ The disjunction for bitwise operations: SELECT bitTestAny(number, index1, index2, index3, index4, ...) ``` -**Parameters** +**Arguments** - `number` – integer number. - `index1`, `index2`, `index3`, `index4` – positions of bit. @@ -220,7 +220,7 @@ Calculates the number of bits set to one in the binary representation of a numbe bitCount(x) ``` -**Parameters** +**Arguments** - `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index a66098beffb..bfff70576f2 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -21,7 +21,7 @@ Build a bitmap from unsigned integer array. bitmapBuild(array) ``` -**Parameters** +**Arguments** - `array` – unsigned integer array. @@ -45,7 +45,7 @@ Convert bitmap to integer array. bitmapToArray(bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -69,7 +69,7 @@ Return subset in specified range (not include the range_end). bitmapSubsetInRange(bitmap, range_start, range_end) ``` -**Parameters** +**Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). - `range_start` – range start point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -97,7 +97,7 @@ Creates a subset of bitmap with n elements taken between `range_start` and `card bitmapSubsetLimit(bitmap, range_start, cardinality_limit) ``` -**Parameters** +**Arguments** - `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). - `range_start` – The subset starting point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -133,7 +133,7 @@ Checks whether the bitmap contains an element. bitmapContains(haystack, needle) ``` -**Parameters** +**Arguments** - `haystack` – [Bitmap object](#bitmap_functions-bitmapbuild), where the function searches. - `needle` – Value that the function searches. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -167,7 +167,7 @@ bitmapHasAny(bitmap1, bitmap2) If you are sure that `bitmap2` contains strictly one element, consider using the [bitmapContains](#bitmap_functions-bitmapcontains) function. It works more efficiently. -**Parameters** +**Arguments** - `bitmap*` – bitmap object. @@ -197,7 +197,7 @@ If the second argument is an empty bitmap then returns 1. bitmapHasAll(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -221,7 +221,7 @@ Retrun bitmap cardinality of type UInt64. bitmapCardinality(bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -243,7 +243,7 @@ Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is em bitmapMin(bitmap) -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -263,7 +263,7 @@ Retrun the greatest value of type UInt64 in the set, 0 if the set is empty. bitmapMax(bitmap) -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -283,7 +283,7 @@ Transform an array of values in a bitmap to another array of values, the result bitmapTransform(bitmap, from_array, to_array) -**Parameters** +**Arguments** - `bitmap` – bitmap object. - `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. Note that the result depends on array ordering if there are common elements between from_array and to_array. @@ -307,7 +307,7 @@ Two bitmap and calculation, the result is a new bitmap. bitmapAnd(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -331,7 +331,7 @@ Two bitmap or calculation, the result is a new bitmap. bitmapOr(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -355,7 +355,7 @@ Two bitmap xor calculation, the result is a new bitmap. bitmapXor(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -379,7 +379,7 @@ Two bitmap andnot calculation, the result is a new bitmap. bitmapAndnot(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -403,7 +403,7 @@ Two bitmap and calculation, return cardinality of type UInt64. bitmapAndCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -427,7 +427,7 @@ Two bitmap or calculation, return cardinality of type UInt64. bitmapOrCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -451,7 +451,7 @@ Two bitmap xor calculation, return cardinality of type UInt64. bitmapXorCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. @@ -475,7 +475,7 @@ Two bitmap andnot calculation, return cardinality of type UInt64. bitmapAndnotCardinality(bitmap,bitmap) ``` -**Parameters** +**Arguments** - `bitmap` – bitmap object. diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md index 446a4729ff2..2d57cbb3bd5 100644 --- a/docs/en/sql-reference/functions/conditional-functions.md +++ b/docs/en/sql-reference/functions/conditional-functions.md @@ -17,7 +17,7 @@ SELECT if(cond, then, else) If the condition `cond` evaluates to a non-zero value, returns the result of the expression `then`, and the result of the expression `else`, if present, is skipped. If the `cond` is zero or `NULL`, then the result of the `then` expression is skipped and the result of the `else` expression, if present, is returned. -**Parameters** +**Arguments** - `cond` – The condition for evaluation that can be zero or not. The type is UInt8, Nullable(UInt8) or NULL. - `then` - The expression to return if condition is met. @@ -117,7 +117,7 @@ Allows you to write the [CASE](../../sql-reference/operators/index.md#operator_c Syntax: `multiIf(cond_1, then_1, cond_2, then_2, ..., else)` -**Parameters:** +**Arguments:** - `cond_N` — The condition for the function to return `then_N`. - `then_N` — The result of the function when executed. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 9de780fb596..f26e1bee6c9 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -186,7 +186,7 @@ Truncates sub-seconds. toStartOfSecond(value[, timezone]) ``` -**Parameters** +**Arguments** - `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). - `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). @@ -328,7 +328,7 @@ For mode values with a meaning of “contains January 1”, the week contains Ja toWeek(date, [, mode][, Timezone]) ``` -**Parameters** +**Arguments** - `date` – Date or DateTime. - `mode` – Optional parameter, Range of values is \[0,9\], default is 0. @@ -378,9 +378,9 @@ date_trunc(unit, value[, timezone]) Alias: `dateTrunc`. -**Parameters** +**Arguments** -- `unit` — Part of date. [String](../syntax.md#syntax-string-literal). +- `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal). Possible values: - `second` @@ -435,6 +435,201 @@ Result: - [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) +## date\_add {#date_add} + +Adds specified date/time interval to the provided date. + +**Syntax** + +``` sql +date_add(unit, value, date) +``` + +Aliases: `dateAdd`, `DATE_ADD`. + +**Arguments** + +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. +- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md) +- `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). + + +**Returned value** + +Returns Date or DateTime with `value` expressed in `unit` added to `date`. + +**Example** + +```sql +select date_add(YEAR, 3, toDate('2018-01-01')); +``` + +```text +┌─plus(toDate('2018-01-01'), toIntervalYear(3))─┐ +│ 2021-01-01 │ +└───────────────────────────────────────────────┘ +``` + +## date\_diff {#date_diff} + +Returns the difference between two Date or DateTime values. + +**Syntax** + +``` sql +date_diff('unit', startdate, enddate, [timezone]) +``` + +Aliases: `dateDiff`, `DATE_DIFF`. + +**Arguments** + +- `unit` — The type of interval for result [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. + +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). + +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). + +- `timezone` — Optional parameter. If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. + +**Returned value** + +Difference between `enddate` and `startdate` expressed in `unit`. + +Type: `int`. + +**Example** + +Query: + +``` sql +SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Result: + +``` text +┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 25 │ +└────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +## date\_sub {#date_sub} + +Subtracts a time/date interval from the provided date. + +**Syntax** + +``` sql +date_sub(unit, value, date) +``` + +Aliases: `dateSub`, `DATE_SUB`. + +**Arguments** + +- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. +- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md) +- `date` — [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md) to subtract value from. + +**Returned value** + +Returns Date or DateTime with `value` expressed in `unit` subtracted from `date`. + +**Example** + +Query: + +``` sql +SELECT date_sub(YEAR, 3, toDate('2018-01-01')); +``` + +Result: + +``` text +┌─minus(toDate('2018-01-01'), toIntervalYear(3))─┐ +│ 2015-01-01 │ +└────────────────────────────────────────────────┘ +``` + +## timestamp\_add {#timestamp_add} + +Adds the specified time value with the provided date or date time value. + +**Syntax** + +``` sql +timestamp_add(date, INTERVAL value unit) +``` + +Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. + +**Arguments** + +- `date` — Date or Date with time - [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md) +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. + +**Returned value** + +Returns Date or DateTime with the specified `value` expressed in `unit` added to `date`. + +**Example** + +```sql +select timestamp_add(toDate('2018-01-01'), INTERVAL 3 MONTH); +``` + +```text +┌─plus(toDate('2018-01-01'), toIntervalMonth(3))─┐ +│ 2018-04-01 │ +└────────────────────────────────────────────────┘ +``` + +## timestamp\_sub {#timestamp_sub} + +Returns the difference between two dates in the specified unit. + +**Syntax** + +``` sql +timestamp_sub(unit, value, date) +``` + +Aliases: `timeStampSub`, `TIMESTAMP_SUB`. + +**Arguments** + +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). + + Supported values: second, minute, hour, day, week, month, quarter, year. +- `value` - Value in specified unit - [Int](../../sql-reference/data-types/int-uint.md). +- `date`- [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). + +**Returned value** + +Difference between `date` and the specified `value` expressed in `unit`. + +**Example** + +```sql +select timestamp_sub(MONTH, 5, toDateTime('2018-12-18 01:02:03')); +``` + +```text +┌─minus(toDateTime('2018-12-18 01:02:03'), toIntervalMonth(5))─┐ +│ 2018-07-18 01:02:03 │ +└──────────────────────────────────────────────────────────────┘ +``` + ## now {#now} Returns the current date and time. @@ -445,7 +640,7 @@ Returns the current date and time. now([timezone]) ``` -**Parameters** +**Arguments** - `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). @@ -550,50 +745,6 @@ SELECT └──────────────────────────┴───────────────────────────────┘ ``` -## dateDiff {#datediff} - -Returns the difference between two Date or DateTime values. - -**Syntax** - -``` sql -dateDiff('unit', startdate, enddate, [timezone]) -``` - -**Parameters** - -- `unit` — Time unit, in which the returned value is expressed. [String](../../sql-reference/syntax.md#syntax-string-literal). - - Supported values: second, minute, hour, day, week, month, quarter, year. - -- `startdate` — The first time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). - -- `enddate` — The second time value to compare. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). - -- `timezone` — Optional parameter. If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. - -**Returned value** - -Difference between `startdate` and `enddate` expressed in `unit`. - -Type: `int`. - -**Example** - -Query: - -``` sql -SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Result: - -``` text -┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 25 │ -└────────────────────────────────────────────────────────────────────────────────────────┘ -``` - ## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size} For a time interval starting at ‘StartTime’ and continuing for ‘Duration’ seconds, it returns an array of moments in time, consisting of points from this interval rounded down to the ‘Size’ in seconds. ‘Size’ is an optional parameter: a constant UInt32, set to 1800 by default. @@ -704,7 +855,7 @@ Converts a [Proleptic Gregorian calendar](https://en.wikipedia.org/wiki/Prolepti toModifiedJulianDay(date) ``` -**Parameters** +**Arguments** - `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). @@ -740,7 +891,7 @@ Similar to [toModifiedJulianDay()](#tomodifiedjulianday), but instead of raising toModifiedJulianDayOrNull(date) ``` -**Parameters** +**Arguments** - `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). @@ -776,7 +927,7 @@ Converts a [Modified Julian Day](https://en.wikipedia.org/wiki/Julian_day#Varian fromModifiedJulianDay(day) ``` -**Parameters** +**Arguments** - `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). @@ -812,7 +963,7 @@ Similar to [fromModifiedJulianDayOrNull()](#frommodifiedjuliandayornull), but in fromModifiedJulianDayOrNull(day) ``` -**Parameters** +**Arguments** - `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index bc3f5ca4345..31e84c08b39 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -15,7 +15,7 @@ Returns the string with the length as the number of passed arguments and each by char(number_1, [number_2, ..., number_n]); ``` -**Parameters** +**Arguments** - `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). @@ -107,7 +107,7 @@ For `String` and `FixedString`, all bytes are simply encoded as two hexadecimal Values of floating point and Decimal types are encoded as their representation in memory. As we support little endian architecture, they are encoded in little endian. Zero leading/trailing bytes are not omitted. -**Parameters** +**Arguments** - `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index bef2f8137d0..0dd7469b25e 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -11,7 +11,7 @@ Key length depends on encryption mode. It is 16, 24, and 32 bytes long for `-128 Initialization vector length is always 16 bytes (bytes in excess of 16 are ignored). -Note that these functions work slowly. +Note that these functions work slowly until ClickHouse 21.1. ## encrypt {#encrypt} @@ -31,7 +31,7 @@ This function encrypts data using these modes: encrypt('mode', 'plaintext', 'key' [, iv, aad]) ``` -**Parameters** +**Arguments** - `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). - `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). @@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Returned value** -- Ciphered String. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). **Examples** @@ -52,57 +52,38 @@ Query: ``` sql CREATE TABLE encryption_test ( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; + `comment` String, + `secret` String +) +ENGINE = Memory ``` -Insert this data: +Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes: Query: ``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\ +('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\ +('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\ +('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212')); ``` -Example without `iv`: - Query: ``` sql -SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test; +SELECT comment, hex(secret) FROM encryption_test; ``` Result: ``` text -┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐ -│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │ -│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │ -│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │ -└─────────────┴──────────────────────────────────────────────────────────────────┘ -``` - -Example with `iv`: - -Query: - -``` sql -SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; -``` - -Result: - -``` text -┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐ -│ aes-256-ctr │ │ -│ aes-256-ctr │ 7FB039F7 │ -│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │ -└─────────────┴───────────────────────────────────────────────┘ +┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐ +│ aes-256-cfb128 no IV │ B4972BDC4459 │ +│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │ +│ aes-256-cfb128 with IV │ 5E6CB398F653 │ +│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │ +└─────────────────────────────────────┴──────────────────────────────────┘ ``` Example with `-gcm`: @@ -110,40 +91,26 @@ Example with `-gcm`: Query: ``` sql -SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test; +INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \ +('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad')); + +SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%'; ``` Result: ``` text -┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐ -│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │ -│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │ -│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │ -└─────────────┴────────────────────────────────────────────────────────────────────────┘ -``` - -Example with `-gcm` mode and with `aad`: - -Query: - -``` sql -SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test; -``` - -Result: - -``` text -┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐ -│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │ -│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │ -│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │ -└─────────────┴────────────────────────────────────────────────────────────────────────┘ +┌─comment──────────────┬─hex(secret)──────────────────────────────────┐ +│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │ +│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │ +└──────────────────────┴──────────────────────────────────────────────┘ ``` ## aes_encrypt_mysql {#aes_encrypt_mysql} -Compatible with mysql encryption and can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function. +Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function. + +Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`. Supported encryption modes: @@ -156,86 +123,106 @@ Supported encryption modes: **Syntax** -```sql +``` sql aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) ``` -**Parameters** +**Arguments** - `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). - `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Ciphered String. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). + **Examples** -Create this table: +Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext: Query: ``` sql -CREATE TABLE encryption_test -( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; +SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal; ``` -Insert this data: +Result: -Query: - -``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +``` +┌─ciphertexts_equal─┐ +│ 1 │ +└───────────────────┘ ``` -Example without `iv`: + +But `encrypt` fails when `key` or `iv` is longer than expected: Query: ``` sql -SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test; +SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'); ``` Result: ``` text -┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐ -│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │ -│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │ -│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │ -└─────────────┴──────────────────────────────────────────────────────────────────┘ +Received exception from server (version 21.1.2): +Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123'). ``` -Example with `iv`: +While `aes_encrypt_mysql` produces MySQL-compatitalbe output: Query: ``` sql -SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test; +SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext; +``` + +Result: + +```text +┌─ciphertext───┐ +│ 24E9E4966469 │ +└──────────────┘ +``` + +Notice how supplying even longer `IV` produces the same result + +Query: + +``` sql +SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext ``` Result: ``` text -┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐ -│ aes-256-cfb128 │ │ -│ aes-256-cfb128 │ 7FB039F7 │ -│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │ -└────────────────┴────────────────────────────────────────────────────────────┘ +┌─ciphertext───┐ +│ 24E9E4966469 │ +└──────────────┘ +``` + +Which is binary equal to what MySQL produces on same inputs: + +``` sql +mysql> SET block_encryption_mode='aes-256-cfb128'; +Query OK, 0 rows affected (0.00 sec) + +mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext; ++------------------------+ +| ciphertext | ++------------------------+ +| 0x24E9E4966469 | ++------------------------+ +1 row in set (0.00 sec) ``` ## decrypt {#decrypt} -This function decrypts data using these modes: +This function decrypts ciphertext into a plaintext using these modes: - aes-128-ecb, aes-192-ecb, aes-256-ecb - aes-128-cbc, aes-192-cbc, aes-256-cbc @@ -247,11 +234,11 @@ This function decrypts data using these modes: **Syntax** -```sql +``` sql decrypt('mode', 'ciphertext', 'key' [, iv, aad]) ``` -**Parameters** +**Arguments** - `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). - `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). @@ -265,51 +252,56 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Examples** -Create this table: +Re-using table from [encrypt](./encryption-functions.md#encrypt). Query: ``` sql -CREATE TABLE encryption_test -( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; -``` - -Insert this data: - -Query: - -``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); -``` - -Query: - -``` sql - -SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test; +SELECT comment, hex(secret) FROM encryption_test; ``` Result: -```text -┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐ -│ aes-128-ecb │ │ -│ aes-128-ecb │ text │ -│ aes-128-ecb │ What Is ClickHouse? │ -└─────────────┴─────────────────────────────────────────────────────────────────────┘ +``` text +┌─comment──────────────┬─hex(secret)──────────────────────────────────┐ +│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │ +│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │ +└──────────────────────┴──────────────────────────────────────────────┘ +┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐ +│ aes-256-cfb128 no IV │ B4972BDC4459 │ +│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │ +│ aes-256-cfb128 with IV │ 5E6CB398F653 │ +│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │ +└─────────────────────────────────────┴──────────────────────────────────┘ ``` +Now let's try to decrypt all that data. + +Query: + +``` sql +SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test +``` + +Result: +``` text +┌─comment─────────────────────────────┬─plaintext─┐ +│ aes-256-cfb128 no IV │ Secret │ +│ aes-256-cfb128 no IV, different key │ �4� + � │ +│ aes-256-cfb128 with IV │ ���6�~ │ + │aes-256-cbc no IV │ �2*4�h3c�4w��@ +└─────────────────────────────────────┴───────────┘ +``` + +Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption. + ## aes_decrypt_mysql {#aes_decrypt_mysql} Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function. +Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_decrypt_mysql` will stick to what MySQL's `aes_decrypt` does: 'fold' `key` and ignore excess bits of `IV`. + Supported decryption modes: - aes-128-ecb, aes-192-ecb, aes-256-ecb @@ -321,11 +313,11 @@ Supported decryption modes: **Syntax** -```sql +``` sql aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) ``` -**Parameters** +**Arguments** - `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). - `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). @@ -338,44 +330,30 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) **Examples** -Create this table: - -Query: - +Let's decrypt data we've previously encrypted with MySQL: ``` sql -CREATE TABLE encryption_test -( - input String, - key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'), - iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'), - key32 String DEFAULT substring(key, 1, 32), - key24 String DEFAULT substring(key, 1, 24), - key16 String DEFAULT substring(key, 1, 16) -) Engine = Memory; -``` +mysql> SET block_encryption_mode='aes-256-cfb128'; +Query OK, 0 rows affected (0.00 sec) -Insert this data: - -Query: - -``` sql -INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?'); +mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext; ++------------------------+ +| ciphertext | ++------------------------+ +| 0x24E9E4966469 | ++------------------------+ +1 row in set (0.00 sec) ``` Query: - ``` sql -SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test; +SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext ``` Result: - ``` text -┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐ -│ aes-128-cbc │ │ -│ aes-128-cbc │ text │ -│ aes-128-cbc │ What Is ClickHouse? │ -└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘ +┌─plaintext─┐ +│ Secret │ +└───────────┘ ``` [Original article](https://clickhouse.tech/docs/en/sql-reference/functions/encryption_functions/) diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 7df6ef54f2a..834fcdf8282 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -19,7 +19,7 @@ dictGet('dict_name', 'attr_name', id_expr) dictGetOrDefault('dict_name', 'attr_name', id_expr, default_value_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). @@ -108,7 +108,7 @@ Checks whether a key is present in a dictionary. dictHas('dict_name', id_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. @@ -130,7 +130,7 @@ Creates an array, containing all the parents of a key in the [hierarchical dicti dictGetHierarchy('dict_name', key) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. @@ -149,7 +149,7 @@ Checks the ancestor of a key through the whole hierarchical chain in the diction dictIsIn('dict_name', child_id_expr, ancestor_id_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. @@ -185,7 +185,7 @@ dictGet[Type]('dict_name', 'attr_name', id_expr) dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) ``` -**Parameters** +**Arguments** - `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). - `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index c32af7194fb..df75e96c8fb 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -13,7 +13,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal isNull(x) ``` -**Parameters** +**Arguments** - `x` — A value with a non-compound data type. @@ -53,7 +53,7 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal isNotNull(x) ``` -**Parameters:** +**Arguments:** - `x` — A value with a non-compound data type. @@ -93,7 +93,7 @@ Checks from left to right whether `NULL` arguments were passed and returns the f coalesce(x,...) ``` -**Parameters:** +**Arguments:** - Any number of parameters of a non-compound type. All parameters must be compatible by data type. @@ -136,7 +136,7 @@ Returns an alternative value if the main argument is `NULL`. ifNull(x,alt) ``` -**Parameters:** +**Arguments:** - `x` — The value to check for `NULL`. - `alt` — The value that the function returns if `x` is `NULL`. @@ -176,7 +176,7 @@ Returns `NULL` if the arguments are equal. nullIf(x, y) ``` -**Parameters:** +**Arguments:** `x`, `y` — Values for comparison. They must be compatible types, or ClickHouse will generate an exception. @@ -215,7 +215,7 @@ Results in a value of type [Nullable](../../sql-reference/data-types/nullable.md assumeNotNull(x) ``` -**Parameters:** +**Arguments:** - `x` — The original value. @@ -277,7 +277,7 @@ Converts the argument type to `Nullable`. toNullable(x) ``` -**Parameters:** +**Arguments:** - `x` — The value of any non-compound type. diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index 6f288a7687d..c27eab0b421 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -72,7 +72,7 @@ Returns an array of [geohash](#geohash)-encoded strings of given precision that geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precision) ``` -**Parameters** +**Arguments** - `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). - `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index 4ed651e4e9e..9dda947b3a7 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -162,7 +162,7 @@ Returns [H3](#h3index) point index `(lon, lat)` with specified resolution. geoToH3(lon, lat, resolution) ``` -**Parameters** +**Arguments** - `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). - `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -201,7 +201,7 @@ Result: h3kRing(h3index, k) ``` -**Parameters** +**Arguments** - `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `k` — Raduis. Type: [integer](../../../sql-reference/data-types/int-uint.md) @@ -315,7 +315,7 @@ Returns whether or not the provided [H3](#h3index) indexes are neighbors. h3IndexesAreNeighbors(index1, index2) ``` -**Parameters** +**Arguments** - `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -353,7 +353,7 @@ Returns an array of child indexes for the given [H3](#h3index) index. h3ToChildren(index, resolution) ``` -**Parameters** +**Arguments** - `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -390,7 +390,7 @@ Returns the parent (coarser) index containing the given [H3](#h3index) index. h3ToParent(index, resolution) ``` -**Parameters** +**Arguments** - `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 9394426b20b..6bf1bebabaa 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -9,7 +9,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elem ## halfMD5 {#hash-functions-halfmd5} -[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. +[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. ``` sql halfMD5(par1, ...) @@ -18,9 +18,9 @@ halfMD5(par1, ...) The function is relatively slow (5 million short strings per second per processor core). Consider using the [sipHash64](#hash_functions-siphash64) function instead. -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -54,16 +54,16 @@ sipHash64(par1,...) This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function. -Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm: +Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm: 1. After hashing all the input parameters, the function gets the array of hashes. 2. Function takes the first and the second elements and calculates a hash for the array of them. 3. Then the function takes the hash value, calculated at the previous step, and the third element of the initial hash array, and calculates a hash for the array of them. 4. The previous step is repeated for all the remaining elements of the initial hash array. -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -97,9 +97,9 @@ cityHash64(par1,...) This is a fast non-cryptographic hash function. It uses the CityHash algorithm for string parameters and implementation-specific fast non-cryptographic hash function for parameters with other data types. The function uses the CityHash combinator to get the final results. -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -166,9 +166,9 @@ farmHash64(par1, ...) These functions use the `Fingerprint64` and `Hash64` methods respectively from all [available methods](https://github.com/google/farmhash/blob/master/src/farmhash.h). -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -226,7 +226,7 @@ Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add97 javaHashUTF16LE(stringUtf16le) ``` -**Parameters** +**Arguments** - `stringUtf16le` — a string in UTF-16LE encoding. @@ -292,9 +292,9 @@ Produces a 64-bit [MetroHash](http://www.jandrewrogers.com/2015/05/27/metrohash/ metroHash64(par1, ...) ``` -**Parameters** +**Arguments** -The function takes a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +The function takes a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -327,9 +327,9 @@ murmurHash2_32(par1, ...) murmurHash2_64(par1, ...) ``` -**Parameters** +**Arguments** -Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -358,7 +358,7 @@ Calculates a 64-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash val gccMurmurHash(par1, ...); ``` -**Parameters** +**Arguments** - `par1, ...` — A variable number of parameters that can be any of the [supported data types](../../sql-reference/data-types/index.md#data_types). @@ -395,9 +395,9 @@ murmurHash3_32(par1, ...) murmurHash3_64(par1, ...) ``` -**Parameters** +**Arguments** -Both functions take a variable number of input parameters. Parameters can be any of the [supported data types](../../sql-reference/data-types/index.md). +Both functions take a variable number of input parameters. Arguments can be any of the [supported data types](../../sql-reference/data-types/index.md). **Returned Value** @@ -424,7 +424,7 @@ Produces a 128-bit [MurmurHash3](https://github.com/aappleby/smhasher) hash valu murmurHash3_128( expr ) ``` -**Parameters** +**Arguments** - `expr` — [Expressions](../../sql-reference/syntax.md#syntax-expressions) returning a [String](../../sql-reference/data-types/string.md)-type value. diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index bfa1998d68a..964265a461b 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -32,7 +32,7 @@ If you use official ClickHouse packages, you need to install the `clickhouse-com addressToLine(address_of_binary_instruction) ``` -**Parameters** +**Arguments** - `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. @@ -123,7 +123,7 @@ Converts virtual memory address inside ClickHouse server process to the symbol f addressToSymbol(address_of_binary_instruction) ``` -**Parameters** +**Arguments** - `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. @@ -220,7 +220,7 @@ Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) demangle(symbol) ``` -**Parameters** +**Arguments** - `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. @@ -345,7 +345,7 @@ Emits trace log message to server log for each [Block](https://clickhouse.tech/d logTrace('message') ``` -**Parameters** +**Arguments** - `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index faf551601ac..eaea5e250fb 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -115,9 +115,20 @@ LIMIT 10 ## IPv6StringToNum(s) {#ipv6stringtonums} -The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes. +The reverse function of IPv6NumToString. If the IPv6 address has an invalid format, it returns a string of null bytes. +If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned. HEX can be uppercase or lowercase. +``` sql +SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0); +``` + +``` text +┌─cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0)─┐ +│ ::ffff:127.0.0.1 │ +└─────────────────────────────────────────────┘ +``` + ## IPv4ToIPv6(x) {#ipv4toipv6x} Takes a `UInt32` number. Interprets it as an IPv4 address in [big endian](https://en.wikipedia.org/wiki/Endianness). Returns a `FixedString(16)` value containing the IPv6 address in binary format. Examples: @@ -214,6 +225,7 @@ SELECT ## toIPv6(string) {#toipv6string} An alias to `IPv6StringToNum()` that takes a string form of IPv6 address and returns value of [IPv6](../../sql-reference/data-types/domains/ipv6.md) type, which is binary equal to value returned by `IPv6StringToNum()`. +If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned. ``` sql WITH @@ -243,33 +255,91 @@ SELECT └───────────────────────────────────┴──────────────────────────────────┘ ``` - -## isIPv4String - -Determines if the input string is an IPv4 address or not. Returns `1` if true `0` otherwise. - ``` sql -SELECT isIPv4String('127.0.0.1') +SELECT toIPv6('127.0.0.1') ``` ``` text -┌─isIPv4String('127.0.0.1')─┐ -│ 1 │ -└───────────────────────────┘ +┌─toIPv6('127.0.0.1')─┐ +│ ::ffff:127.0.0.1 │ +└─────────────────────┘ ``` -## isIPv6String +## isIPv4String {#isipv4string} -Determines if the input string is an IPv6 address or not. Returns `1` if true `0` otherwise. +Determines whether the input string is an IPv4 address or not. If `string` is IPv6 address returns `0`. -``` sql -SELECT isIPv6String('2001:438:ffff::407d:1bc1') +**Syntax** + +```sql +isIPv4String(string) ``` +**Arguments** + +- `string` — IP address. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- `1` if `string` is IPv4 address, `0` otherwise. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Examples** + +Query: + +```sql +SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr +``` + +Result: + ``` text -┌─isIPv6String('2001:438:ffff::407d:1bc1')─┐ -│ 1 │ -└──────────────────────────────────────────┘ +┌─addr─────────────┬─isIPv4String(addr)─┐ +│ 0.0.0.0 │ 1 │ +│ 127.0.0.1 │ 1 │ +│ ::ffff:127.0.0.1 │ 0 │ +└──────────────────┴────────────────────┘ +``` + +## isIPv6String {#isipv6string} + +Determines whether the input string is an IPv6 address or not. If `string` is IPv4 address returns `0`. + +**Syntax** + +```sql +isIPv6String(string) +``` + +**Arguments** + +- `string` — IP address. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- `1` if `string` is IPv6 address, `0` otherwise. + +Type: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Examples** + +Query: + +``` sql +SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr +``` + +Result: + +``` text +┌─addr─────────────┬─isIPv6String(addr)─┐ +│ :: │ 1 │ +│ 1111::ffff │ 1 │ +│ ::ffff:127.0.0.1 │ 1 │ +│ 127.0.0.1 │ 0 │ +└──────────────────┴────────────────────┘ ``` [Original article](https://clickhouse.tech/docs/en/query_language/functions/ip_address_functions/) diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 05e755eaddc..edee048eb77 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -236,7 +236,7 @@ Extracts raw data from a JSON object. JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) ``` -**Parameters** +**Arguments** - `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. - `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md index 8627fc26bad..f103a4ea421 100644 --- a/docs/en/sql-reference/functions/machine-learning-functions.md +++ b/docs/en/sql-reference/functions/machine-learning-functions.md @@ -27,7 +27,7 @@ Compares test groups (variants) and calculates for each group the probability to bayesAB(distribution_name, higher_is_better, variant_names, x, y) ``` -**Parameters** +**Arguments** - `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values: diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 8dc287593c7..f56a721c0c0 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -121,7 +121,7 @@ Accepts a numeric argument and returns a UInt64 number close to 10 to the power cosh(x) ``` -**Parameters** +**Arguments** - `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -157,7 +157,7 @@ Result: acosh(x) ``` -**Parameters** +**Arguments** - `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -197,7 +197,7 @@ Result: sinh(x) ``` -**Parameters** +**Arguments** - `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -233,7 +233,7 @@ Result: asinh(x) ``` -**Parameters** +**Arguments** - `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -273,7 +273,7 @@ Result: atanh(x) ``` -**Parameters** +**Arguments** - `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -309,7 +309,7 @@ The [function](https://en.wikipedia.org/wiki/Atan2) calculates the angle in the atan2(y, x) ``` -**Parameters** +**Arguments** - `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). - `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -346,7 +346,7 @@ Calculates the length of the hypotenuse of a right-angle triangle. The [function hypot(x, y) ``` -**Parameters** +**Arguments** - `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). - `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -383,7 +383,7 @@ Calculates `log(1+x)`. The [function](https://en.wikipedia.org/wiki/Natural_loga log1p(x) ``` -**Parameters** +**Arguments** - `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). @@ -423,7 +423,7 @@ The `sign` function can extract the sign of a real number. sign(x) ``` -**Parameters** +**Arguments** - `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 08d34770f57..04e921b5c55 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -19,7 +19,7 @@ Gets a named value from the [macros](../../operations/server-configuration-param getMacro(name); ``` -**Parameters** +**Arguments** - `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). @@ -108,7 +108,7 @@ Extracts the trailing part of a string after the last slash or backslash. This f basename( expr ) ``` -**Parameters** +**Arguments** - `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value. @@ -182,13 +182,102 @@ If `NULL` is passed to the function as input, then it returns the `Nullable(Noth Gets the size of the block. In ClickHouse, queries are always run on blocks (sets of column parts). This function allows getting the size of the block that you called it for. -## byteSize(...) {#function-bytesize} +## byteSize {#function-bytesize} -Get an estimate of uncompressed byte size of its arguments in memory. -E.g. for UInt32 argument it will return constant 4, for String argument - the string length + 9 (terminating zero + length). -The function can take multiple arguments. The typical application is byteSize(*). +Returns estimation of uncompressed byte size of its arguments in memory. -Use case: Suppose you have a service that stores data for multiple clients in one table. Users will pay per data volume. So, you need to implement accounting of users data volume. The function will allow to calculate the data size on per-row basis. +**Syntax** + +```sql +byteSize(argument [, ...]) +``` + +**Arguments** + +- `argument` — Value. + +**Returned value** + +- Estimation of byte size of the arguments in memory. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Examples** + +For [String](../../sql-reference/data-types/string.md) arguments the funtion returns the string length + 9 (terminating zero + length). + +Query: + +```sql +SELECT byteSize('string'); +``` + +Result: + +```text +┌─byteSize('string')─┐ +│ 15 │ +└────────────────────┘ +``` + +Query: + +```sql +CREATE TABLE test +( + `key` Int32, + `u8` UInt8, + `u16` UInt16, + `u32` UInt32, + `u64` UInt64, + `i8` Int8, + `i16` Int16, + `i32` Int32, + `i64` Int64, + `f32` Float32, + `f64` Float64 +) +ENGINE = MergeTree +ORDER BY key; + +INSERT INTO test VALUES(1, 8, 16, 32, 64, -8, -16, -32, -64, 32.32, 64.64); + +SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)`, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)`, byteSize(f32) AS `byteSize(Float32)`, byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +key: 1 +byteSize(UInt8): 1 +byteSize(UInt16): 2 +byteSize(UInt32): 4 +byteSize(UInt64): 8 +byteSize(Int8): 1 +byteSize(Int16): 2 +byteSize(Int32): 4 +byteSize(Int64): 8 +byteSize(Float32): 4 +byteSize(Float64): 8 +``` + +If the function takes multiple arguments, it returns their combined byte size. + +Query: + +```sql +SELECT byteSize(NULL, 1, 0.3, ''); +``` + +Result: + +```text +┌─byteSize(NULL, 1, 0.3, '')─┐ +│ 19 │ +└────────────────────────────┘ +``` ## materialize(x) {#materializex} @@ -260,7 +349,7 @@ The function is intended for development, debugging and demonstration. isConstant(x) ``` -**Parameters** +**Arguments** - `x` — Expression to check. @@ -331,7 +420,7 @@ Checks whether floating point value is finite. ifNotFinite(x,y) -**Parameters** +**Arguments** - `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). - `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). @@ -371,7 +460,7 @@ Allows building a unicode-art diagram. `bar(x, min, max, width)` draws a band with a width proportional to `(x - min)` and equal to `width` characters when `x = max`. -Parameters: +**Arguments** - `x` — Size to display. - `min, max` — Integer constants. The value must fit in `Int64`. @@ -556,7 +645,7 @@ Accepts the time delta in seconds. Returns a time delta with (year, month, day, formatReadableTimeDelta(column[, maximum_unit]) ``` -**Parameters** +**Arguments** - `column` — A column with numeric time delta. - `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. @@ -641,7 +730,7 @@ The result of the function depends on the affected data blocks and the order of The rows order used during the calculation of `neighbor` can differ from the order of rows returned to the user. To prevent that you can make a subquery with ORDER BY and call the function from outside the subquery. -**Parameters** +**Arguments** - `column` — A column name or scalar expression. - `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md). @@ -820,6 +909,66 @@ WHERE diff != 1 Same as for [runningDifference](../../sql-reference/functions/other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. +## runningConcurrency {#runningconcurrency} + +Given a series of beginning time and ending time of events, this function calculates concurrency of the events at each of the data point, that is, the beginning time. + +!!! warning "Warning" + Events spanning multiple data blocks will not be processed correctly. The function resets its state for each new data block. + +The result of the function depends on the order of data in the block. It assumes the beginning time is sorted in ascending order. + +**Syntax** + +``` sql +runningConcurrency(begin, end) +``` + +**Arguments** + +- `begin` — A column for the beginning time of events (inclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `end` — A column for the ending time of events (exclusive). [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). + +Note that two columns `begin` and `end` must have the same type. + +**Returned values** + +- The concurrency of events at the data point. + +Type: [UInt32](../../sql-reference/data-types/int-uint.md) + +**Example** + +Input table: + +``` text +┌───────────────begin─┬─────────────────end─┐ +│ 2020-12-01 00:00:00 │ 2020-12-01 00:59:59 │ +│ 2020-12-01 00:30:00 │ 2020-12-01 00:59:59 │ +│ 2020-12-01 00:40:00 │ 2020-12-01 01:30:30 │ +│ 2020-12-01 01:10:00 │ 2020-12-01 01:30:30 │ +│ 2020-12-01 01:50:00 │ 2020-12-01 01:59:59 │ +└─────────────────────┴─────────────────────┘ +``` + +Query: + +``` sql +SELECT runningConcurrency(begin, end) FROM example +``` + +Result: + +``` text +┌─runningConcurrency(begin, end)─┐ +│ 1 │ +│ 2 │ +│ 3 │ +│ 2 │ +│ 1 │ +└────────────────────────────────┘ +``` + ## MACNumToString(num) {#macnumtostringnum} Accepts a UInt64 number. Interprets it as a MAC address in big endian. Returns a string containing the corresponding MAC address in the format AA:BB:CC:DD:EE:FF (colon-separated numbers in hexadecimal form). @@ -840,7 +989,7 @@ Returns the number of fields in [Enum](../../sql-reference/data-types/enum.md). getSizeOfEnumType(value) ``` -**Parameters:** +**Arguments:** - `value` — Value of type `Enum`. @@ -869,7 +1018,7 @@ Returns size on disk (without taking into account compression). blockSerializedSize(value[, value[, ...]]) ``` -**Parameters** +**Arguments** - `value` — Any value. @@ -901,7 +1050,7 @@ Returns the name of the class that represents the data type of the column in RAM toColumnTypeName(value) ``` -**Parameters:** +**Arguments:** - `value` — Any type of value. @@ -941,7 +1090,7 @@ Outputs a detailed description of data structures in RAM dumpColumnStructure(value) ``` -**Parameters:** +**Arguments:** - `value` — Any type of value. @@ -971,7 +1120,7 @@ Does not include default values for custom columns set by the user. defaultValueOfArgumentType(expression) ``` -**Parameters:** +**Arguments:** - `expression` — Arbitrary type of value or an expression that results in a value of an arbitrary type. @@ -1013,7 +1162,7 @@ Does not include default values for custom columns set by the user. defaultValueOfTypeName(type) ``` -**Parameters:** +**Arguments:** - `type` — A string representing a type name. @@ -1055,7 +1204,7 @@ Used for internal implementation of [arrayJoin](../../sql-reference/functions/ar SELECT replicate(x, arr); ``` -**Parameters:** +**Arguments:** - `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`. - `x` — The value that the resulting array will be filled with. @@ -1188,7 +1337,7 @@ Takes state of aggregate function. Returns result of aggregation (or finalized s finalizeAggregation(state) ``` -**Parameters** +**Arguments** - `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). @@ -1292,7 +1441,7 @@ Accumulates states of an aggregate function for each row of a data block. runningAccumulate(agg_state[, grouping]); ``` -**Parameters** +**Arguments** - `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). - `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. @@ -1398,7 +1547,7 @@ Only supports tables created with the `ENGINE = Join(ANY, LEFT, )` st joinGet(join_storage_table_name, `value_column`, join_keys) ``` -**Parameters** +**Arguments** - `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. - `value_column` — name of the column of the table that contains required data. @@ -1502,7 +1651,7 @@ Generates a string with a random set of [ASCII](https://en.wikipedia.org/wiki/AS randomPrintableASCII(length) ``` -**Parameters** +**Arguments** - `length` — Resulting string length. Positive integer. @@ -1538,7 +1687,7 @@ Generates a binary string of the specified length filled with random bytes (incl randomString(length) ``` -**Parameters** +**Arguments** - `length` — String length. Positive integer. @@ -1586,7 +1735,7 @@ Generates a binary string of the specified length filled with random bytes (incl randomFixedString(length); ``` -**Parameters** +**Arguments** - `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1624,7 +1773,7 @@ Generates a random string of a specified length. Result string contains valid UT randomStringUTF8(length); ``` -**Parameters** +**Arguments** - `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1696,7 +1845,7 @@ Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is isDecimalOverflow(d, [p]) ``` -**Parameters** +**Arguments** - `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). - `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). @@ -1733,7 +1882,7 @@ Returns number of decimal digits you need to represent the value. countDigits(x) ``` -**Parameters** +**Arguments** - `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. @@ -1792,7 +1941,7 @@ Returns [native interface](../../interfaces/tcp.md) TCP port number listened by tcpPort() ``` -**Parameters** +**Arguments** - None. diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index 68998928398..2b9846344e4 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -32,7 +32,7 @@ Produces a constant column with a random value. randConstant([x]) ``` -**Parameters** +**Arguments** - `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. @@ -81,7 +81,7 @@ fuzzBits([s], [prob]) Inverts bits of `s`, each with probability `prob`. -**Parameters** +**Arguments** - `s` - `String` or `FixedString` - `prob` - constant `Float32/64` diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 922cf7374d7..83db1975366 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -35,7 +35,7 @@ The function returns the nearest number of the specified order. In case when giv round(expression [, decimal_places]) ``` -**Parameters:** +**Arguments:** - `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). - `decimal-places` — An integer value. @@ -114,7 +114,7 @@ For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding: roundBankers(expression [, decimal_places]) ``` -**Parameters** +**Arguments** - `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). - `decimal-places` — Decimal places. An integer number. diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 25f41211b47..c70ee20f076 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -16,7 +16,7 @@ Returns an array of selected substrings. Empty substrings may be selected if the splitByChar(, ) ``` -**Parameters** +**Arguments** - `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). - `s` — The string to split. [String](../../sql-reference/data-types/string.md). @@ -53,7 +53,7 @@ Splits a string into substrings separated by a string. It uses a constant string splitByString(, ) ``` -**Parameters** +**Arguments** - `separator` — The separator. [String](../../sql-reference/data-types/string.md). - `s` — The string to split. [String](../../sql-reference/data-types/string.md). @@ -121,7 +121,7 @@ Extracts all groups from non-overlapping substrings matched by a regular express extractAllGroups(text, regexp) ``` -**Parameters** +**Arguments** - `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). - `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 2b93dd924a3..191bd100dda 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -76,7 +76,7 @@ Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running i toValidUTF8( input_string ) ``` -Parameters: +**Arguments** - input_string — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. @@ -104,7 +104,7 @@ Repeats a string as many times as specified and concatenates the replicated valu repeat(s, n) ``` -**Parameters** +**Arguments** - `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). - `n` — The number of times to repeat the string. [UInt](../../sql-reference/data-types/int-uint.md). @@ -173,7 +173,7 @@ Concatenates the strings listed in the arguments, without a separator. concat(s1, s2, ...) ``` -**Parameters** +**Arguments** Values of type String or FixedString. @@ -211,7 +211,7 @@ The function is named “injective” if it always returns different result for concatAssumeInjective(s1, s2, ...) ``` -**Parameters** +**Arguments** Values of type String or FixedString. @@ -328,7 +328,7 @@ By default removes all consecutive occurrences of common whitespace (ASCII chara trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) ``` -**Parameters** +**Arguments** - `trim_character` — specified characters for trim. [String](../../sql-reference/data-types/string.md). - `input_string` — string for trim. [String](../../sql-reference/data-types/string.md). @@ -367,7 +367,7 @@ trimLeft(input_string) Alias: `ltrim(input_string)`. -**Parameters** +**Arguments** - `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). @@ -405,7 +405,7 @@ trimRight(input_string) Alias: `rtrim(input_string)`. -**Parameters** +**Arguments** - `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). @@ -443,7 +443,7 @@ trimBoth(input_string) Alias: `trim(input_string)`. -**Parameters** +**Arguments** - `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). @@ -496,7 +496,7 @@ Replaces literals, sequences of literals and complex aliases with placeholders. normalizeQuery(x) ``` -**Parameters** +**Arguments** - `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). @@ -532,7 +532,7 @@ Returns identical 64bit hash values without the values of literals for similar q normalizedQueryHash(x) ``` -**Parameters** +**Arguments** - `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). @@ -570,7 +570,7 @@ The following five XML predefined entities will be replaced: `<`, `&`, `>`, `"`, encodeXMLComponent(x) ``` -**Parameters** +**Arguments** - `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md). @@ -600,4 +600,46 @@ Hello, "world"! 'foo' ``` +## decodeXMLComponent {#decode-xml-component} + +Replaces XML predefined entities with characters. Predefined entities are `"` `&` `'` `>` `<` +This function also replaces numeric character references with Unicode characters. Both decimal (like `✓`) and hexadecimal (`✓`) forms are supported. + +**Syntax** + +``` sql +decodeXMLComponent(x) +``` + +**Parameters** + +- `x` — A sequence of characters. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- The sequence of characters after replacement. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT decodeXMLComponent(''foo''); +SELECT decodeXMLComponent('< Σ >'); +``` + +Result: + +``` text +'foo' +< Σ > +``` + +**See Also** + +- [List of XML and HTML character entity references](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references) + + [Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 4036974dd37..83b0edea438 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -14,8 +14,6 @@ The search is case-sensitive by default in all these functions. There are separa Returns the position (in bytes) of the found substring in the string, starting from 1. -Works under the assumption that the string contains a set of bytes representing a single-byte encoded text. If this assumption is not met and a character can’t be represented using a single byte, the function doesn’t throw an exception and returns some unexpected result. If character can be represented using two bytes, it will use two bytes and so on. - For a case-insensitive search, use the function [positionCaseInsensitive](#positioncaseinsensitive). **Syntax** @@ -26,7 +24,7 @@ position(haystack, needle[, start_pos]) Alias: `locate(haystack, needle[, start_pos])`. -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -97,7 +95,7 @@ Works under the assumption that the string contains a set of bytes representing positionCaseInsensitive(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -140,7 +138,7 @@ For a case-insensitive search, use the function [positionCaseInsensitiveUTF8](#p positionUTF8(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -213,7 +211,7 @@ Works under the assumption that the string contains a set of bytes representing positionCaseInsensitiveUTF8(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -258,7 +256,7 @@ The search is performed on sequences of bytes without respect to string encoding multiSearchAllPositions(haystack, [needle1, needle2, ..., needlen]) ``` -**Parameters** +**Arguments** - `haystack` — string, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -373,7 +371,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi extractAllGroupsHorizontal(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). - `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). @@ -414,7 +412,7 @@ Matches all groups of the `haystack` string using the `pattern` regular expressi extractAllGroupsVertical(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). - `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). @@ -473,7 +471,7 @@ Case insensitive variant of [like](https://clickhouse.tech/docs/en/sql-reference ilike(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — Input string. [String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — If `pattern` doesn't contain percent signs or underscores, then the `pattern` only represents the string itself. An underscore (`_`) in `pattern` stands for (matches) any single character. A percent sign (`%`) matches any sequence of zero or more characters. @@ -550,7 +548,7 @@ For a case-insensitive search, use [countSubstringsCaseInsensitive](../../sql-re countSubstrings(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -616,7 +614,7 @@ Returns the number of substring occurrences case-insensitive. countSubstringsCaseInsensitive(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -682,7 +680,7 @@ Returns the number of substring occurrences in `UTF-8` case-insensitive. SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). @@ -734,7 +732,7 @@ Returns the number of regular expression matches for a `pattern` in a `haystack` countMatches(haystack, pattern) ``` -**Parameters** +**Arguments** - `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). - `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index dcbcd3e374b..1006b68b8ee 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -45,7 +45,7 @@ untuple(x) You can use the `EXCEPT` expression to skip columns as a result of the query. -**Parameters** +**Arguments** - `x` - A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index a46c36395b8..2b3a9d9103f 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -5,6 +5,68 @@ toc_title: Working with maps # Functions for maps {#functions-for-working-with-tuple-maps} +## map {#function-map} + +Arranges `key:value` pairs into [Map(key, value)](../../sql-reference/data-types/map.md) data type. + +**Syntax** + +``` sql +map(key1, value1[, key2, value2, ...]) +``` + +**Arguments** + +- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md) or [Integer](../../sql-reference/data-types/int-uint.md). +- `value` — The value part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) or [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- Data structure as `key:value` pairs. + +Type: [Map(key, value)](../../sql-reference/data-types/map.md). + +**Examples** + +Query: + +``` sql +SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); +``` + +Result: + +``` text +┌─map('key1', number, 'key2', multiply(number, 2))─┐ +│ {'key1':0,'key2':0} │ +│ {'key1':1,'key2':2} │ +│ {'key1':2,'key2':4} │ +└──────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a; +INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); +SELECT a['key2'] FROM table_map; +``` + +Result: + +``` text +┌─arrayElement(a, 'key2')─┐ +│ 0 │ +│ 2 │ +│ 4 │ +└─────────────────────────┘ +``` + +**See Also** + +- [Map(key, value)](../../sql-reference/data-types/map.md) data type + + ## mapAdd {#function-mapadd} Collect all the keys and sum corresponding values. @@ -15,7 +77,7 @@ Collect all the keys and sum corresponding values. mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...]) ``` -**Parameters** +**Arguments** Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. @@ -49,7 +111,7 @@ Collect all the keys and subtract corresponding values. mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...]) ``` -**Parameters** +**Arguments** Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. @@ -87,7 +149,7 @@ Generates a map, where keys are a series of numbers, from minimum to maximum key The number of elements in `keys` and `values` must be the same for each row. -**Parameters** +**Arguments** - `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). - `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). @@ -112,4 +174,4 @@ Result: └──────────────────────────────┴───────────────────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) +[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 6237cd6a976..0ea2bf0f1a6 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -22,7 +22,7 @@ Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) - `toInt128(expr)` — Results in the `Int128` data type. - `toInt256(expr)` — Results in the `Int256` data type. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. @@ -88,7 +88,7 @@ Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md - `toUInt64(expr)` — Results in the `UInt64` data type. - `toUInt256(expr)` — Results in the `UInt256` data type. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. @@ -154,7 +154,7 @@ Converts an input string to a [Nullable(Decimal(P,S))](../../sql-reference/data- These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `NULL` value instead of an exception in the event of an input value parsing error. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. @@ -199,7 +199,7 @@ Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/dec These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `0` value instead of an exception in the event of an input value parsing error. -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions), returns a value in the [String](../../sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. - `S` — Scale, the number of decimal places in the resulting value. @@ -303,81 +303,48 @@ SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut └────────────┴───────┘ ``` -## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264} +## reinterpret(x, T) {#type_conversion_function-reinterpret} -## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264} +Performs byte reinterpretation of ‘x’ as ‘t’ data type. -## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264} - -## reinterpretAsDate {#reinterpretasdate} - -## reinterpretAsDateTime {#reinterpretasdatetime} - -These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch. - -## reinterpretAsString {#type_conversion_functions-reinterpretAsString} - -This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. - -## reinterpretAsFixedString {#reinterpretasfixedstring} - -This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long. - -## reinterpretAsUUID {#reinterpretasuuid} - -This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored. - -**Syntax** +Following reinterpretations are allowed: +1. Any type that has fixed size and value of that type can be represented continuously into FixedString. +2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. +3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString, ``` sql -reinterpretAsUUID(fixed_string) +SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, + reinterpret(toInt8(1), 'Float32') as int_to_float, + reinterpret('1', 'UInt32') as string_to_int; ``` -**Parameters** - -- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring). - -**Returned value** - -- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type). - -**Examples** - -String to UUID. - -Query: - -``` sql -SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f'))) -``` - -Result: - ``` text -┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐ -│ 08090a0b-0c0d-0e0f-0001-020304050607 │ -└───────────────────────────────────────────────────────────────────────┘ +┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐ +│ 255 │ 1e-45 │ 49 │ +└─────────────┴──────────────┴───────────────┘ ``` -Going back and forth from String to UUID. +## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretAsUInt8163264256} -Query: +## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretAsInt8163264128256} -``` sql -WITH - generateUUIDv4() AS uuid, - identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str, - reinterpretAsUUID(reverse(unhex(str))) AS uuid2 -SELECT uuid = uuid2; -``` +## reinterpretAsDecimal(32\|64\|128\|256) {#reinterpretAsDecimal3264128256} -Result: +## reinterpretAsFloat(32\|64) {#type_conversion_function-reinterpretAsFloat} -``` text -┌─equals(uuid, uuid2)─┐ -│ 1 │ -└─────────────────────┘ -``` +## reinterpretAsDate {#type_conversion_function-reinterpretAsDate} + +## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime} + +## reinterpretAsDateTime64 {#type_conversion_function-reinterpretAsDateTime64} + +## reinterpretAsString {#type_conversion_function-reinterpretAsString} + +## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString} + +## reinterpretAsUUID {#type_conversion_function-reinterpretAsUUID} + +These functions are aliases for `reinterpret` function. ## CAST(x, T) {#type_conversion_function-cast} @@ -438,7 +405,7 @@ bounds of type T. Example ``` sql -SELECT cast(-1, 'UInt8') as uint8; +SELECT cast(-1, 'UInt8') as uint8; ``` @@ -459,7 +426,7 @@ Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in c ## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null} -Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL +Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL if the casted value is not representable in the target type. Example: @@ -504,7 +471,7 @@ toIntervalQuarter(number) toIntervalYear(number) ``` -**Parameters** +**Arguments** - `number` — Duration of interval. Positive integer number. @@ -542,7 +509,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112 parseDateTimeBestEffort(time_string [, time_zone]); ``` -**Parameters** +**Arguments** - `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). - `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). @@ -654,7 +621,7 @@ This function is similar to [‘parseDateTimeBestEffort’](#parsedatetimebestef parseDateTimeBestEffortUS(time_string [, time_zone]); ``` -**Parameters** +**Arguments** - `time_string` — String containing a date and time to convert. [String](../../sql-reference/data-types/string.md). - `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](../../sql-reference/data-types/string.md). @@ -738,7 +705,7 @@ To convert data from the `LowCardinality` data type use the [CAST](#type_convers toLowCardinality(expr) ``` -**Parameters** +**Arguments** - `expr` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../sql-reference/data-types/index.md#data_types). @@ -778,7 +745,7 @@ Converts a `DateTime64` to a `Int64` value with fixed sub-second precision. Inpu toUnixTimestamp64Milli(value) ``` -**Parameters** +**Arguments** - `value` — DateTime64 value with any precision. @@ -830,7 +797,7 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and fromUnixTimestamp64Milli(value [, ti]) ``` -**Parameters** +**Arguments** - `value` — `Int64` value with any precision. - `timezone` — `String` (optional) timezone name of the result. @@ -854,15 +821,15 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC') ## formatRow {#formatrow} -Converts arbitrary expressions into a string via given format. +Converts arbitrary expressions into a string via given format. -**Syntax** +**Syntax** ``` sql formatRow(format, x, y, ...) ``` -**Parameters** +**Arguments** - `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). - `x`,`y`, ... — Expressions. @@ -897,13 +864,13 @@ Result: Converts arbitrary expressions into a string via given format. The function trims the last `\n` if any. -**Syntax** +**Syntax** ``` sql formatRowNoNewline(format, x, y, ...) ``` -**Parameters** +**Arguments** - `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). - `x`,`y`, ... — Expressions. diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index 006542f494a..9e79ef2d0cb 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -25,7 +25,7 @@ Extracts the hostname from a URL. domain(url) ``` -**Parameters** +**Arguments** - `url` — URL. Type: [String](../../sql-reference/data-types/string.md). @@ -76,7 +76,7 @@ Extracts the the top-level domain from a URL. topLevelDomain(url) ``` -**Parameters** +**Arguments** - `url` — URL. Type: [String](../../sql-reference/data-types/string.md). @@ -133,10 +133,9 @@ For example: ### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom} -Same as `cutToFirstSignificantSubdomain` but accept custom TLD list name, useful if: +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain. Accepts custom [TLD list](https://en.wikipedia.org/wiki/List_of_Internet_top-level_domains) name. -- you need fresh TLD list, -- or you have custom. +Can be useful if you need fresh TLD list or you have custom. Configuration example: @@ -149,21 +148,150 @@ Configuration example: ``` -Example: +**Syntax** -- `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/', 'public_suffix_list') = 'yandex.com.tr'`. +``` sql +cutToFirstSignificantSubdomain(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Result: + +```text +┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo.there-is-no-such-domain │ +└───────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} -Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name. +Returns the part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Accepts custom TLD list name. + +Can be useful if you need fresh TLD list or you have custom. + +Configuration example: + +```xml + + + + public_suffix_list.dat + + +``` + +**Syntax** + +```sql +cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list'); +``` + +Result: + +```text +┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐ +│ www.foo │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom} -Same as `firstSignificantSubdomain` but accept custom TLD list name. +Returns the first significant subdomain. Accepts customs TLD list name. -### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} +Can be useful if you need fresh TLD list or you have custom. -Same as `cutToFirstSignificantSubdomainWithWWW` but accept custom TLD list name. +Configuration example: + +```xml + + + + public_suffix_list.dat + + +``` + +**Syntax** + +```sql +firstSignificantSubdomainCustom(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- First significant subdomain. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +```sql +SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Result: + +```text +┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**See Also** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### port(URL\[, default_port = 0\]) {#port} @@ -242,7 +370,7 @@ Extracts network locality (`username:password@host:port`) from a URL. netloc(URL) ``` -**Parameters** +**Arguments** - `url` — URL. [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index f70532252c7..56530b5e83b 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -115,7 +115,7 @@ Finds the highest continent in the hierarchy for the region. regionToTopContinent(id[, geobase]); ``` -**Parameters** +**Arguments** - `id` — Region ID from the Yandex geobase. [UInt32](../../sql-reference/data-types/int-uint.md). - `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 0ea4d4b3dc5..16aa266ebf9 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -20,10 +20,12 @@ The following actions are supported: - [ADD COLUMN](#alter_add-column) — Adds a new column to the table. - [DROP COLUMN](#alter_drop-column) — Deletes the column. +- [RENAME COLUMN](#alter_rename-column) — Renames the column. - [CLEAR COLUMN](#alter_clear-column) — Resets column values. - [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column. - [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL. - [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties. +- [RENAME COLUMN](#alter_rename-column) — Renames an existing column. These actions are described in detail below. @@ -78,6 +80,22 @@ Example: ALTER TABLE visits DROP COLUMN browser ``` +## RENAME COLUMN {#alter_rename-column} + +``` sql +RENAME COLUMN [IF EXISTS] name to new_name +``` + +Renames the column `name` to `new_name`. If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist. Since renaming does not involve the underlying data, the query is completed almost instantly. + +**NOTE**: Columns specified in the key expression of the table (either with `ORDER BY` or `PRIMARY KEY`) cannot be renamed. Trying to change these columns will produce `SQL Error [524]`. + +Example: + +``` sql +ALTER TABLE visits RENAME COLUMN webBrowser TO browser +``` + ## CLEAR COLUMN {#alter_clear-column} ``` sql @@ -166,6 +184,22 @@ ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL; - [REMOVE TTL](ttl.md). +## RENAME COLUMN {#alter_rename-column} + +Renames an existing column. + +Syntax: + +```sql +ALTER TABLE table_name RENAME COLUMN column_name TO new_column_name; +``` + +**Example** + +```sql +ALTER TABLE table_with_ttl RENAME COLUMN column_ttl TO column_ttl_new; +``` + ## Limitations {#alter-query-limitations} The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot. diff --git a/docs/en/sql-reference/statements/alter/quota.md b/docs/en/sql-reference/statements/alter/quota.md index 2923fd04c4b..a43b5255598 100644 --- a/docs/en/sql-reference/statements/alter/quota.md +++ b/docs/en/sql-reference/statements/alter/quota.md @@ -12,9 +12,28 @@ Syntax: ``` sql ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name] [RENAME TO new_name] - [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] - [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR} - {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] | + [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED] + [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} + {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | NO LIMITS | TRACKING ONLY} [,...]] [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` +Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table. + +Parameters `queries`, `query_selects`, 'query_inserts', errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. + +`ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). + +**Examples** + +Limit the maximum number of queries for the current user with 123 queries in 15 months constraint: + +``` sql +ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER; +``` + +For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters: + +``` sql +ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default; +``` diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index 20537b98a46..71416abf588 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -11,19 +11,29 @@ Syntax: ``` sql CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] - [KEYED BY {'none' | 'user name' | 'ip address' | 'forwarded ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] - [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR} - {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] | + [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED] + [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} + {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | NO LIMITS | TRACKING ONLY} [,...]] [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` +Keys `user_name`, `ip_address`, `client_key`, `client_key, user_name` and `client_key, ip_address` correspond to the fields in the [system.quotas](../../../operations/system-tables/quotas.md) table. + +Parameters `queries`, `query_selects`, `query_inserts`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` correspond to the fields in the [system.quotas_usage](../../../operations/system-tables/quotas_usage.md) table. + `ON CLUSTER` clause allows creating quotas on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md). -## Example {#create-quota-example} +**Examples** Limit the maximum number of queries for the current user with 123 queries in 15 months constraint: ``` sql -CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER +CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER; +``` + +For the default user limit the maximum execution time with half a second in 30 minutes, and limit the maximum number of queries with 321 and the maximum number of errors with 10 in 5 quaters: + +``` sql +CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default; ``` diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index d5343cce7be..c1a52e3b864 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -12,7 +12,7 @@ Syntax: ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH}] BY {'password'|'hash'}] + [IDENTIFIED [WITH {NO_PASSWORD|PLAINTEXT_PASSWORD|SHA256_PASSWORD|SHA256_HASH|DOUBLE_SHA1_PASSWORD|DOUBLE_SHA1_HASH|LDAP_SERVER}] BY {'password'|'hash'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] @@ -30,6 +30,7 @@ There are multiple ways of user identification: - `IDENTIFIED WITH sha256_hash BY 'hash'` - `IDENTIFIED WITH double_sha1_password BY 'qwerty'` - `IDENTIFIED WITH double_sha1_hash BY 'hash'` +- `IDENTIFIED WITH ldap_server BY 'server'` ## User Host {#user-host} diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 4370735b8d9..8acd58f4338 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -41,7 +41,6 @@ SELECT a, b, c FROM (SELECT ...) CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... ``` - Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query. When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` – the table engine for storing data. @@ -59,6 +58,197 @@ A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Note The execution of [ALTER](../../../sql-reference/statements/alter/index.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view. +Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view. + Views look the same as normal tables. For example, they are listed in the result of the `SHOW TABLES` query. There isn’t a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md). + +## Live View (Experimental) {#live-view} + +!!! important "Important" + This is an experimental feature that may change in backwards-incompatible ways in the future releases. + Enable usage of live views and `WATCH` query using `set allow_experimental_live_view = 1`. + + +```sql +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +``` + +Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. + +Live views are triggered by insert into the innermost table specified in the query. + +Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery. + +!!! info "Limitations" + - [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table. + - Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view. + - Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved. + - Does not work with replicated or distributed tables where inserts are performed on different nodes. + - Can't be triggered by multiple tables. + + See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround. + +You can watch for changes in the live view query result using the [WATCH](../../../sql-reference/statements/watch.md) query + +```sql +WATCH [db.]live_view +``` + +**Example:** + +```sql +CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; +CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt; +``` + +Watch a live view while doing a parallel insert into the source table. + +```sql +WATCH lv +``` + +```bash +┌─sum(x)─┬─_version─┐ +│ 1 │ 1 │ +└────────┴──────────┘ +┌─sum(x)─┬─_version─┐ +│ 2 │ 2 │ +└────────┴──────────┘ +┌─sum(x)─┬─_version─┐ +│ 6 │ 3 │ +└────────┴──────────┘ +... +``` + +```sql +INSERT INTO mt VALUES (1); +INSERT INTO mt VALUES (2); +INSERT INTO mt VALUES (3); +``` + +or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events. + +```sql +WATCH [db.]live_view EVENTS +``` + +**Example:** + +```sql +WATCH lv EVENTS +``` + +```bash +┌─version─┐ +│ 1 │ +└─────────┘ +┌─version─┐ +│ 2 │ +└─────────┘ +┌─version─┐ +│ 3 │ +└─────────┘ +... +``` + +You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view in the same way as for any regular view or a table. If the query result is cached it will return the result immediately without running the stored query on the underlying tables. + +```sql +SELECT * FROM [db.]live_view WHERE ... +``` + +### Force Refresh {#live-view-alter-refresh} + +You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement. + +### With Timeout {#live-view-with-timeout} + +When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view. + +```sql +CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ... +``` + +If the timeout value is not specified then the value specified by the `temporary_live_view_timeout` setting is used. + +**Example:** + +```sql +CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x; +CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt; +``` + +### With Refresh {#live-view-with-refresh} + +When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger. + +```sql +CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ... +``` + +If the refresh value is not specified then the value specified by the `periodic_live_view_refresh` setting is used. + +**Example:** + +```sql +CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now(); +WATCH lv +``` + +```bash +┌───────────────now()─┬─_version─┐ +│ 2021-02-21 08:47:05 │ 1 │ +└─────────────────────┴──────────┘ +┌───────────────now()─┬─_version─┐ +│ 2021-02-21 08:47:10 │ 2 │ +└─────────────────────┴──────────┘ +┌───────────────now()─┬─_version─┐ +│ 2021-02-21 08:47:15 │ 3 │ +└─────────────────────┴──────────┘ +``` + +You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause. + +```sql +CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ... +``` + +**Example:** + +```sql +CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now(); +``` + +After 15 sec the live view will be automatically dropped if there are no active `WATCH` queries. + +```sql +WATCH lv +``` + +``` +Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv doesn't exist.. +``` + +### Usage + +Most common uses of live view tables include: + +- Providing push notifications for query result changes to avoid polling. +- Caching results of most frequent queries to provide immediate query results. +- Watching for table changes and triggering a follow-up select queries. +- Watching metrics from system tables using periodic refresh. + +### Settings {#live-view-settings} + +You can use the following settings to control the behaviour of live views. + +- `allow_experimental_live_view` - enable live views. Default is `0`. +- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive. Default is `15` seconds. +- `max_live_view_insert_blocks_before_refresh` - maximum number of inserted blocks after which + mergeable blocks are dropped and query is re-executed. Default is `64` inserts. +- `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default is `5` seconds. +- `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default is `60` seconds. + +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index 2928e50224d..c517a515ab7 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ Basic query format: INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). +You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). For example, consider the table: @@ -62,8 +62,6 @@ If a list of columns doesn't include all existing columns, the rest of the colum - The values calculated from the `DEFAULT` expressions specified in the table definition. - Zeros and empty strings, if `DEFAULT` expressions are not defined. -If [strict\_insert\_defaults=1](../../operations/settings/settings.md), columns that do not have `DEFAULT` defined must be listed in the query. - Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query: ``` sql diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 7c13772ffdf..e99ebef838c 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -278,5 +278,4 @@ Other ways to make settings see [here](../../../operations/settings/index.md). SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1; ``` -[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) - +[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/) diff --git a/docs/en/sql-reference/statements/watch.md b/docs/en/sql-reference/statements/watch.md new file mode 100644 index 00000000000..761bc8a041e --- /dev/null +++ b/docs/en/sql-reference/statements/watch.md @@ -0,0 +1,106 @@ +--- +toc_priority: 53 +toc_title: WATCH +--- + +# WATCH Statement (Experimental) {#watch} + +!!! important "Important" + This is an experimental feature that may change in backwards-incompatible ways in the future releases. + Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`. + + +``` sql +WATCH [db.]live_view +[EVENTS] +[LIMIT n] +[FORMAT format] +``` + +The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a [live view](./create/view.md#live-view). + +```sql +WATCH [db.]live_view +``` + +The virtual `_version` column in the query result indicates the current result version. + +**Example:** + +```sql +CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now(); +WATCH lv +``` + +```bash +┌───────────────now()─┬─_version─┐ +│ 2021-02-21 09:17:21 │ 1 │ +└─────────────────────┴──────────┘ +┌───────────────now()─┬─_version─┐ +│ 2021-02-21 09:17:26 │ 2 │ +└─────────────────────┴──────────┘ +┌───────────────now()─┬─_version─┐ +│ 2021-02-21 09:17:31 │ 3 │ +└─────────────────────┴──────────┘ +... +``` + +By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md) it can be forwarded to a different table. + +```sql +INSERT INTO [db.]table WATCH [db.]live_view ... +``` + +## EVENTS Clause {#events-clause} + +The `EVENTS` clause can be used to obtain a short form of the `WATCH` query where instead of the query result you will just get the latest query result version. + +```sql +WATCH [db.]live_view EVENTS +``` + +**Example:** + +```sql +CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now(); +WATCH lv EVENTS +``` + +```bash +┌─version─┐ +│ 1 │ +└─────────┘ +┌─version─┐ +│ 2 │ +└─────────┘ +... +``` + +## LIMIT Clause {#limit-clause} + +The `LIMIT n` clause species the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query is evaluated. + +```sql +WATCH [db.]live_view LIMIT 1 +``` + +**Example:** + +```sql +CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now(); +WATCH lv EVENTS LIMIT 1 +``` + +```bash +┌─version─┐ +│ 1 │ +└─────────┘ +``` + +## FORMAT Clause {#format-clause} + +The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/statements/select/format.md#format-clause). + +!!! info "Note" + The [JSONEachRowWithProgress](../../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting. + diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index 5bbd22dfe4e..be6ba2b8bc4 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -13,7 +13,7 @@ Supports all data types that can be stored in table except `LowCardinality` and generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]); ``` -**Parameters** +**Arguments** - `name` — Name of corresponding column. - `TypeName` — Type of corresponding column. diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index eec4a1d0c46..14cd4369285 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -13,7 +13,7 @@ Allows `SELECT` and `INSERT` queries to be performed on data that is stored on a mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']) ``` -**Parameters** +**Arguments** - `host:port` — MySQL server address. diff --git a/docs/en/sql-reference/table-functions/view.md b/docs/en/sql-reference/table-functions/view.md index 9997971af65..08096c2b019 100644 --- a/docs/en/sql-reference/table-functions/view.md +++ b/docs/en/sql-reference/table-functions/view.md @@ -13,7 +13,7 @@ Turns a subquery into a table. The function implements views (see [CREATE VIEW]( view(subquery) ``` -**Parameters** +**Arguments** - `subquery` — `SELECT` query. diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md new file mode 100644 index 00000000000..cbf03a44d46 --- /dev/null +++ b/docs/en/sql-reference/window-functions/index.md @@ -0,0 +1,60 @@ +--- +toc_priority: 62 +toc_title: Window Functions +--- + +# [experimental] Window Functions + +!!! warning "Warning" +This is an experimental feature that is currently in development and is not ready +for general use. It will change in unpredictable backwards-incompatible ways in +the future releases. Set `allow_experimental_window_functions = 1` to enable it. + +ClickHouse supports the standard grammar for defining windows and window functions. The following features are currently supported: + +| Feature | Support or workaround | +| --------| ----------| +| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported | +| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | not supported, wrap in a subquery ([feature request](https://github.com/ClickHouse/ClickHouse/issues/19857)) | +| `WINDOW` clause (`select ... from table window w as (partiton by id)`) | supported | +| `ROWS` frame | supported | +| `RANGE` frame | supported, the default | +| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead | +| `GROUPS` frame | not supported | +| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported | +| `rank()`, `dense_rank()`, `row_number()` | supported | +| `lag/lead(value, offset)` | not supported, replace with `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`| + +## References + +### GitHub Issues + +The roadmap for the initial support of window functions is [in this issue](https://github.com/ClickHouse/ClickHouse/issues/18097). + +All GitHub issues related to window funtions have the [comp-window-functions](https://github.com/ClickHouse/ClickHouse/labels/comp-window-functions) tag. + +### Tests + +These tests contain the examples of the currently supported grammar: + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/performance/window_functions.xml + +https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/01591_window_functions.sql + +### Postgres Docs + +https://www.postgresql.org/docs/current/sql-select.html#SQL-WINDOW + +https://www.postgresql.org/docs/devel/sql-expressions.html#SYNTAX-WINDOW-FUNCTIONS + +https://www.postgresql.org/docs/devel/functions-window.html + +https://www.postgresql.org/docs/devel/tutorial-window.html + +### MySQL Docs + +https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-usage.html + +https://dev.mysql.com/doc/refman/8.0/en/window-functions-frames.html diff --git a/docs/en/whats-new/changelog/2020.md b/docs/en/whats-new/changelog/2020.md index 5975edd3c6c..bf4e4fb0fcc 100644 --- a/docs/en/whats-new/changelog/2020.md +++ b/docs/en/whats-new/changelog/2020.md @@ -5,6 +5,22 @@ toc_title: '2020' ### ClickHouse release 20.12 +### ClickHouse release v20.12.5.14-stable, 2020-12-28 + +#### Bug Fix + +* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)). +* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). +* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). +* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)). + +#### Build/Testing/Packaging Improvement + +* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). + + ### ClickHouse release v20.12.4.5-stable, 2020-12-24 #### Bug Fix @@ -142,6 +158,70 @@ toc_title: '2020' ## ClickHouse release 20.11 +### ClickHouse release v20.11.7.16-stable, 2021-03-02 + +#### Improvement + +* Explicitly set uid / gid of clickhouse user & group to the fixed values (101) in clickhouse-server images. [#19096](https://github.com/ClickHouse/ClickHouse/pull/19096) ([filimonov](https://github.com/filimonov)). + +#### Bug Fix + +* BloomFilter index crash fix. Fixes [#19757](https://github.com/ClickHouse/ClickHouse/issues/19757). [#19884](https://github.com/ClickHouse/ClickHouse/pull/19884) ([Maksim Kita](https://github.com/kitaisreal)). +* Deadlock was possible if system.text_log is enabled. This fixes [#19874](https://github.com/ClickHouse/ClickHouse/issues/19874). [#19875](https://github.com/ClickHouse/ClickHouse/pull/19875) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* In previous versions, unusual arguments for function arrayEnumerateUniq may cause crash or infinite loop. This closes [#19787](https://github.com/ClickHouse/ClickHouse/issues/19787). [#19788](https://github.com/ClickHouse/ClickHouse/pull/19788) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed stack overflow when using accurate comparison of arithmetic type with string type. [#19773](https://github.com/ClickHouse/ClickHouse/pull/19773) ([tavplubix](https://github.com/tavplubix)). +* Fix a segmentation fault in `bitmapAndnot` function. Fixes [#19668](https://github.com/ClickHouse/ClickHouse/issues/19668). [#19713](https://github.com/ClickHouse/ClickHouse/pull/19713) ([Maksim Kita](https://github.com/kitaisreal)). +* Some functions with big integers may cause segfault. Big integers is experimental feature. This closes [#19667](https://github.com/ClickHouse/ClickHouse/issues/19667). [#19672](https://github.com/ClickHouse/ClickHouse/pull/19672) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix wrong result of function `neighbor` for `LowCardinality` argument. Fixes [#10333](https://github.com/ClickHouse/ClickHouse/issues/10333). [#19617](https://github.com/ClickHouse/ClickHouse/pull/19617) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix use-after-free of the CompressedWriteBuffer in Connection after disconnect. [#19599](https://github.com/ClickHouse/ClickHouse/pull/19599) ([Azat Khuzhin](https://github.com/azat)). +* `DROP/DETACH TABLE table ON CLUSTER cluster SYNC` query might hang, it's fixed. Fixes [#19568](https://github.com/ClickHouse/ClickHouse/issues/19568). [#19572](https://github.com/ClickHouse/ClickHouse/pull/19572) ([tavplubix](https://github.com/tavplubix)). +* Query CREATE DICTIONARY id expression fix. [#19571](https://github.com/ClickHouse/ClickHouse/pull/19571) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix SIGSEGV with merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read=0/UINT64_MAX. [#19528](https://github.com/ClickHouse/ClickHouse/pull/19528) ([Azat Khuzhin](https://github.com/azat)). +* Buffer overflow (on memory read) was possible if `addMonth` function was called with specifically crafted arguments. This fixes [#19441](https://github.com/ClickHouse/ClickHouse/issues/19441). This fixes [#19413](https://github.com/ClickHouse/ClickHouse/issues/19413). [#19472](https://github.com/ClickHouse/ClickHouse/pull/19472) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Mark distributed batch as broken in case of empty data block in one of files. [#19449](https://github.com/ClickHouse/ClickHouse/pull/19449) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible buffer overflow in Uber H3 library. See https://github.com/uber/h3/issues/392. This closes [#19219](https://github.com/ClickHouse/ClickHouse/issues/19219). [#19383](https://github.com/ClickHouse/ClickHouse/pull/19383) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix system.parts _state column (LOGICAL_ERROR when querying this column, due to incorrect order). [#19346](https://github.com/ClickHouse/ClickHouse/pull/19346) ([Azat Khuzhin](https://github.com/azat)). +* Fix error `Cannot convert column now64() because it is constant but values of constants are different in source and result`. Continuation of [#7156](https://github.com/ClickHouse/ClickHouse/issues/7156). [#19316](https://github.com/ClickHouse/ClickHouse/pull/19316) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug when concurrent `ALTER` and `DROP` queries may hang while processing ReplicatedMergeTree table. [#19237](https://github.com/ClickHouse/ClickHouse/pull/19237) ([alesapin](https://github.com/alesapin)). +* Fix infinite reading from file in `ORC` format (was introduced in [#10580](https://github.com/ClickHouse/ClickHouse/issues/10580)). Fixes [#19095](https://github.com/ClickHouse/ClickHouse/issues/19095). [#19134](https://github.com/ClickHouse/ClickHouse/pull/19134) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix startup bug when clickhouse was not able to read compression codec from `LowCardinality(Nullable(...))` and throws exception `Attempt to read after EOF`. Fixes [#18340](https://github.com/ClickHouse/ClickHouse/issues/18340). [#19101](https://github.com/ClickHouse/ClickHouse/pull/19101) ([alesapin](https://github.com/alesapin)). +* Fixed `There is no checkpoint` error when inserting data through http interface using `Template` or `CustomSeparated` format. Fixes [#19021](https://github.com/ClickHouse/ClickHouse/issues/19021). [#19072](https://github.com/ClickHouse/ClickHouse/pull/19072) ([tavplubix](https://github.com/tavplubix)). +* Restrict `MODIFY TTL` queries for `MergeTree` tables created in old syntax. Previously the query succeeded, but actually it had no effect. [#19064](https://github.com/ClickHouse/ClickHouse/pull/19064) ([Anton Popov](https://github.com/CurtizJ)). +* Make sure `groupUniqArray` returns correct type for argument of Enum type. This closes [#17875](https://github.com/ClickHouse/ClickHouse/issues/17875). [#19019](https://github.com/ClickHouse/ClickHouse/pull/19019) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix possible error `Expected single dictionary argument for function` if use function `ignore` with `LowCardinality` argument. Fixes [#14275](https://github.com/ClickHouse/ClickHouse/issues/14275). [#19016](https://github.com/ClickHouse/ClickHouse/pull/19016) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix inserting of `LowCardinality` column to table with `TinyLog` engine. Fixes [#18629](https://github.com/ClickHouse/ClickHouse/issues/18629). [#19010](https://github.com/ClickHouse/ClickHouse/pull/19010) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Disable `optimize_move_functions_out_of_any` because optimization is not always correct. This closes [#18051](https://github.com/ClickHouse/ClickHouse/issues/18051). This closes [#18973](https://github.com/ClickHouse/ClickHouse/issues/18973). [#18981](https://github.com/ClickHouse/ClickHouse/pull/18981) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed very rare deadlock at shutdown. [#18977](https://github.com/ClickHouse/ClickHouse/pull/18977) ([tavplubix](https://github.com/tavplubix)). +* Fix bug when mutation with some escaped text (like `ALTER ... UPDATE e = CAST('foo', 'Enum8(\'foo\' = 1')` serialized incorrectly. Fixes [#18878](https://github.com/ClickHouse/ClickHouse/issues/18878). [#18944](https://github.com/ClickHouse/ClickHouse/pull/18944) ([alesapin](https://github.com/alesapin)). +* Attach partition should reset the mutation. [#18804](https://github.com/ClickHouse/ClickHouse/issues/18804). [#18935](https://github.com/ClickHouse/ClickHouse/pull/18935) ([fastio](https://github.com/fastio)). +* Fix possible hang at shutdown in clickhouse-local. This fixes [#18891](https://github.com/ClickHouse/ClickHouse/issues/18891). [#18893](https://github.com/ClickHouse/ClickHouse/pull/18893) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)). +* Asynchronous distributed INSERTs can be rejected by the server if the setting `network_compression_method` is globally set to non-default value. This fixes [#18741](https://github.com/ClickHouse/ClickHouse/issues/18741). [#18776](https://github.com/ClickHouse/ClickHouse/pull/18776) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fixed `Attempt to read after eof` error when trying to `CAST` `NULL` from `Nullable(String)` to `Nullable(Decimal(P, S))`. Now function `CAST` returns `NULL` when it cannot parse decimal from nullable string. Fixes [#7690](https://github.com/ClickHouse/ClickHouse/issues/7690). [#18718](https://github.com/ClickHouse/ClickHouse/pull/18718) ([Winter Zhang](https://github.com/zhang2014)). +* Fix Logger with unmatched arg size. [#18717](https://github.com/ClickHouse/ClickHouse/pull/18717) ([sundyli](https://github.com/sundy-li)). +* Add FixedString Data type support. I'll get this exception "Code: 50, e.displayText() = DB::Exception: Unsupported type FixedString(1)" when replicating data from MySQL to ClickHouse. This patch fixes bug [#18450](https://github.com/ClickHouse/ClickHouse/issues/18450) Also fixes [#6556](https://github.com/ClickHouse/ClickHouse/issues/6556). [#18553](https://github.com/ClickHouse/ClickHouse/pull/18553) ([awesomeleo](https://github.com/awesomeleo)). +* Fix possible `Pipeline stuck` error while using `ORDER BY` after subquery with `RIGHT` or `FULL` join. [#18550](https://github.com/ClickHouse/ClickHouse/pull/18550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug which may lead to `ALTER` queries hung after corresponding mutation kill. Found by thread fuzzer. [#18518](https://github.com/ClickHouse/ClickHouse/pull/18518) ([alesapin](https://github.com/alesapin)). +* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Disable constant folding for subqueries on the analysis stage, when the result cannot be calculated. [#18446](https://github.com/ClickHouse/ClickHouse/pull/18446) ([Azat Khuzhin](https://github.com/azat)). +* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)). +* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). +* Fix filling table `system.settings_profile_elements`. This PR fixes [#18231](https://github.com/ClickHouse/ClickHouse/issues/18231). [#18379](https://github.com/ClickHouse/ClickHouse/pull/18379) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix index analysis of binary functions with constant argument which leads to wrong query results. This fixes [#18364](https://github.com/ClickHouse/ClickHouse/issues/18364). [#18373](https://github.com/ClickHouse/ClickHouse/pull/18373) ([Amos Bird](https://github.com/amosbird)). +* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). +* `SELECT count() FROM table` now can be executed if only one any column can be selected from the `table`. This PR fixes [#10639](https://github.com/ClickHouse/ClickHouse/issues/10639). [#18233](https://github.com/ClickHouse/ClickHouse/pull/18233) ([Vitaly Baranov](https://github.com/vitlibar)). +* `SELECT JOIN` now requires the `SELECT` privilege on each of the joined tables. This PR fixes [#17654](https://github.com/ClickHouse/ClickHouse/issues/17654). [#18232](https://github.com/ClickHouse/ClickHouse/pull/18232) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix possible incomplete query result while reading from `MergeTree*` in case of read backoff (message ` MergeTreeReadPool: Will lower number of threads` in logs). Was introduced in [#16423](https://github.com/ClickHouse/ClickHouse/issues/16423). Fixes [#18137](https://github.com/ClickHouse/ClickHouse/issues/18137). [#18216](https://github.com/ClickHouse/ClickHouse/pull/18216) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix error when query `MODIFY COLUMN ... REMOVE TTL` doesn't actually remove column TTL. [#18130](https://github.com/ClickHouse/ClickHouse/pull/18130) ([alesapin](https://github.com/alesapin)). +* Fix indeterministic functions with predicate optimizer. This fixes [#17244](https://github.com/ClickHouse/ClickHouse/issues/17244). [#17273](https://github.com/ClickHouse/ClickHouse/pull/17273) ([Winter Zhang](https://github.com/zhang2014)). +* Mutation might hang waiting for some non-existent part after `MOVE` or `REPLACE PARTITION` or, in rare cases, after `DETACH` or `DROP PARTITION`. It's fixed. [#15537](https://github.com/ClickHouse/ClickHouse/pull/15537) ([tavplubix](https://github.com/tavplubix)). + +#### Build/Testing/Packaging Improvement + +* Update timezones info to 2020e. [#18531](https://github.com/ClickHouse/ClickHouse/pull/18531) ([alesapin](https://github.com/alesapin)). + + + ### ClickHouse release v20.11.6.6-stable, 2020-12-24 #### Bug Fix @@ -588,6 +668,60 @@ toc_title: '2020' ## ClickHouse release 20.9 +### ClickHouse release v20.9.7.11-stable, 2020-12-07 + +#### Performance Improvement + +* Fix performance of reading from `Merge` tables over huge number of `MergeTree` tables. Fixes [#7748](https://github.com/ClickHouse/ClickHouse/issues/7748). [#16988](https://github.com/ClickHouse/ClickHouse/pull/16988) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix + +* Do not restore parts from WAL if `in_memory_parts_enable_wal` is disabled. [#17802](https://github.com/ClickHouse/ClickHouse/pull/17802) ([detailyang](https://github.com/detailyang)). +* Fixed segfault when there is not enough space when inserting into `Distributed` table. [#17737](https://github.com/ClickHouse/ClickHouse/pull/17737) ([tavplubix](https://github.com/tavplubix)). +* Fixed problem when ClickHouse fails to resume connection to MySQL servers. [#17681](https://github.com/ClickHouse/ClickHouse/pull/17681) ([Alexander Kazakov](https://github.com/Akazz)). +* Fixed `Function not implemented` error when executing `RENAME` query in `Atomic` database with ClickHouse running on Windows Subsystem for Linux. Fixes [#17661](https://github.com/ClickHouse/ClickHouse/issues/17661). [#17664](https://github.com/ClickHouse/ClickHouse/pull/17664) ([tavplubix](https://github.com/tavplubix)). +* When clickhouse-client is used in interactive mode with multiline queries, single line comment was erronously extended till the end of query. This fixes [#13654](https://github.com/ClickHouse/ClickHouse/issues/13654). [#17565](https://github.com/ClickHouse/ClickHouse/pull/17565) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix the issue when server can stop accepting connections in very rare cases. [#17542](https://github.com/ClickHouse/ClickHouse/pull/17542) ([alexey-milovidov](https://github.com/alexey-milovidov)). +* Fix alter query hang when the corresponding mutation was killed on the different replica. Fixes [#16953](https://github.com/ClickHouse/ClickHouse/issues/16953). [#17499](https://github.com/ClickHouse/ClickHouse/pull/17499) ([alesapin](https://github.com/alesapin)). +* Fix bug when mark cache size was underestimated by clickhouse. It may happen when there are a lot of tiny files with marks. [#17496](https://github.com/ClickHouse/ClickHouse/pull/17496) ([alesapin](https://github.com/alesapin)). +* Fix `ORDER BY` with enabled setting `optimize_redundant_functions_in_order_by`. [#17471](https://github.com/ClickHouse/ClickHouse/pull/17471) ([Anton Popov](https://github.com/CurtizJ)). +* Fix duplicates after `DISTINCT` which were possible because of incorrect optimization. Fixes [#17294](https://github.com/ClickHouse/ClickHouse/issues/17294). [#17296](https://github.com/ClickHouse/ClickHouse/pull/17296) ([li chengxiang](https://github.com/chengxianglibra)). [#17439](https://github.com/ClickHouse/ClickHouse/pull/17439) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix crash while reading from `JOIN` table with `LowCardinality` types. Fixes [#17228](https://github.com/ClickHouse/ClickHouse/issues/17228). [#17397](https://github.com/ClickHouse/ClickHouse/pull/17397) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix set index invalidation when there are const columns in the subquery. This fixes [#17246](https://github.com/ClickHouse/ClickHouse/issues/17246) . [#17249](https://github.com/ClickHouse/ClickHouse/pull/17249) ([Amos Bird](https://github.com/amosbird)). +* Fix ColumnConst comparison which leads to crash. This fixed [#17088](https://github.com/ClickHouse/ClickHouse/issues/17088) . [#17135](https://github.com/ClickHouse/ClickHouse/pull/17135) ([Amos Bird](https://github.com/amosbird)). +* Fixed crash on `CREATE TABLE ... AS some_table` query when `some_table` was created `AS table_function()` Fixes [#16944](https://github.com/ClickHouse/ClickHouse/issues/16944). [#17072](https://github.com/ClickHouse/ClickHouse/pull/17072) ([tavplubix](https://github.com/tavplubix)). +* Bug fix for funciton fuzzBits, related issue: [#16980](https://github.com/ClickHouse/ClickHouse/issues/16980). [#17051](https://github.com/ClickHouse/ClickHouse/pull/17051) ([hexiaoting](https://github.com/hexiaoting)). +* Avoid unnecessary network errors for remote queries which may be cancelled while execution, like queries with `LIMIT`. [#17006](https://github.com/ClickHouse/ClickHouse/pull/17006) ([Azat Khuzhin](https://github.com/azat)). +* TODO. [#16866](https://github.com/ClickHouse/ClickHouse/pull/16866) ([tavplubix](https://github.com/tavplubix)). +* Return number of affected rows for INSERT queries via MySQL protocol. Previously ClickHouse used to always return 0, it's fixed. Fixes [#16605](https://github.com/ClickHouse/ClickHouse/issues/16605). [#16715](https://github.com/ClickHouse/ClickHouse/pull/16715) ([Winter Zhang](https://github.com/zhang2014)). + +#### Build/Testing/Packaging Improvement + +* Update embedded timezone data to version 2020d (also update cctz to the latest master). [#17204](https://github.com/ClickHouse/ClickHouse/pull/17204) ([filimonov](https://github.com/filimonov)). + + +### ClickHouse release v20.9.6.14-stable, 2020-11-20 + +#### Improvement + +* Make it possible to connect to `clickhouse-server` secure endpoint which requires SNI. This is possible when `clickhouse-server` is hosted behind TLS proxy. [#16938](https://github.com/ClickHouse/ClickHouse/pull/16938) ([filimonov](https://github.com/filimonov)). +* Conditional aggregate functions (for example: `avgIf`, `sumIf`, `maxIf`) should return `NULL` when miss rows and use nullable arguments. [#13964](https://github.com/ClickHouse/ClickHouse/pull/13964) ([Winter Zhang](https://github.com/zhang2014)). + +#### Bug Fix + +* Fix bug when `ON CLUSTER` queries may hang forever for non-leader ReplicatedMergeTreeTables. [#17089](https://github.com/ClickHouse/ClickHouse/pull/17089) ([alesapin](https://github.com/alesapin)). +* Reresolve the IP of the `format_avro_schema_registry_url` in case of errors. [#16985](https://github.com/ClickHouse/ClickHouse/pull/16985) ([filimonov](https://github.com/filimonov)). +* Fix possible server crash after `ALTER TABLE ... MODIFY COLUMN ... NewType` when `SELECT` have `WHERE` expression on altering column and alter doesn't finished yet. [#16968](https://github.com/ClickHouse/ClickHouse/pull/16968) ([Amos Bird](https://github.com/amosbird)). +* Install script should always create subdirs in config folders. This is only relevant for Docker build with custom config. [#16936](https://github.com/ClickHouse/ClickHouse/pull/16936) ([filimonov](https://github.com/filimonov)). +* Fix possible error `Illegal type of argument` for queries with `ORDER BY`. Fixes [#16580](https://github.com/ClickHouse/ClickHouse/issues/16580). [#16928](https://github.com/ClickHouse/ClickHouse/pull/16928) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Abort multipart upload if no data was written to WriteBufferFromS3. [#16840](https://github.com/ClickHouse/ClickHouse/pull/16840) ([Pavel Kovalenko](https://github.com/Jokser)). +* Fix crash when using `any` without any arguments. This is for [#16803](https://github.com/ClickHouse/ClickHouse/issues/16803) . cc @azat. [#16826](https://github.com/ClickHouse/ClickHouse/pull/16826) ([Amos Bird](https://github.com/amosbird)). +* Fix `IN` operator over several columns and tuples with enabled `transform_null_in` setting. Fixes [#15310](https://github.com/ClickHouse/ClickHouse/issues/15310). [#16722](https://github.com/ClickHouse/ClickHouse/pull/16722) ([Anton Popov](https://github.com/CurtizJ)). +* This will fix optimize_read_in_order/optimize_aggregation_in_order with max_threads>0 and expression in ORDER BY. [#16637](https://github.com/ClickHouse/ClickHouse/pull/16637) ([Azat Khuzhin](https://github.com/azat)). +* fixes [#16574](https://github.com/ClickHouse/ClickHouse/issues/16574) fixes [#16231](https://github.com/ClickHouse/ClickHouse/issues/16231) fix remote query failure when using 'if' suffix aggregate function. [#16610](https://github.com/ClickHouse/ClickHouse/pull/16610) ([Winter Zhang](https://github.com/zhang2014)). +* Query is finished faster in case of exception. Cancel execution on remote replicas if exception happens. [#15578](https://github.com/ClickHouse/ClickHouse/pull/15578) ([Azat Khuzhin](https://github.com/azat)). + + ### ClickHouse release v20.9.5.5-stable, 2020-11-13 #### Bug Fix @@ -744,6 +878,23 @@ toc_title: '2020' ## ClickHouse release 20.8 +### ClickHouse release v20.8.12.2-lts, 2021-01-16 + +#### Bug Fix + +* Fix *If combinator with unary function and Nullable types. [#18806](https://github.com/ClickHouse/ClickHouse/pull/18806) ([Azat Khuzhin](https://github.com/azat)). +* Restrict merges from wide to compact parts. In case of vertical merge it led to broken result part. [#18381](https://github.com/ClickHouse/ClickHouse/pull/18381) ([Anton Popov](https://github.com/CurtizJ)). + + +### ClickHouse release v20.8.11.17-lts, 2020-12-25 + +#### Bug Fix + +* Disable write with AIO during merges because it can lead to extremely rare data corruption of primary key columns during merge. [#18481](https://github.com/ClickHouse/ClickHouse/pull/18481) ([alesapin](https://github.com/alesapin)). +* Fixed `value is too short` error when executing `toType(...)` functions (`toDate`, `toUInt32`, etc) with argument of type `Nullable(String)`. Now such functions return `NULL` on parsing errors instead of throwing exception. Fixes [#7673](https://github.com/ClickHouse/ClickHouse/issues/7673). [#18445](https://github.com/ClickHouse/ClickHouse/pull/18445) ([tavplubix](https://github.com/tavplubix)). +* Fix possible crashes in aggregate functions with combinator `Distinct`, while using two-level aggregation. Fixes [#17682](https://github.com/ClickHouse/ClickHouse/issues/17682). [#18365](https://github.com/ClickHouse/ClickHouse/pull/18365) ([Anton Popov](https://github.com/CurtizJ)). + + ### ClickHouse release v20.8.10.13-lts, 2020-12-24 #### Bug Fix diff --git a/docs/es/operations/backup.md b/docs/es/operations/backup.md index a6297070663..be33851574a 100644 --- a/docs/es/operations/backup.md +++ b/docs/es/operations/backup.md @@ -5,7 +5,7 @@ toc_title: Copia de seguridad de datos # Copia de seguridad de datos {#data-backup} -Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse. +Mientras que la [replicación](../engines/table-engines/mergetree-family/replication.md) proporciona protección contra fallos de hardware, no protege de errores humanos: el borrado accidental de datos, elminar la tabla equivocada o una tabla en el clúster equivocado, y bugs de software que dan como resultado un procesado incorrecto de los datos o la corrupción de los datos. En muchos casos, errores como estos afectarán a todas las réplicas. ClickHouse dispone de salvaguardas para prevenir algunos tipos de errores — por ejemplo, por defecto [no se puede simplemente eliminar tablas con un motor similar a MergeTree que contenga más de 50 Gb de datos](server-configuration-parameters/settings.md#max-table-size-to-drop). Sin embargo, estas salvaguardas no cubren todos los casos posibles y pueden eludirse. Para mitigar eficazmente los posibles errores humanos, debe preparar cuidadosamente una estrategia para realizar copias de seguridad y restaurar sus datos **previamente**. diff --git a/docs/fr/operations/backup.md b/docs/fr/operations/backup.md index 9a463372947..953a96a04eb 100644 --- a/docs/fr/operations/backup.md +++ b/docs/fr/operations/backup.md @@ -7,7 +7,7 @@ toc_title: "La Sauvegarde Des Donn\xE9es" # La Sauvegarde Des Données {#data-backup} -Alors [réplication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [vous ne pouvez pas simplement supprimer des tables avec un moteur de type MergeTree contenant plus de 50 Go de données](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Toutefois, ces garanties ne couvrent pas tous les cas possibles et peuvent être contournés. +Alors [réplication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [vous ne pouvez pas simplement supprimer des tables avec un moteur de type MergeTree contenant plus de 50 Go de données](server-configuration-parameters/settings.md#max-table-size-to-drop). Toutefois, ces garanties ne couvrent pas tous les cas possibles et peuvent être contournés. Afin d'atténuer efficacement les erreurs humaines possibles, vous devez préparer soigneusement une stratégie de sauvegarde et de restauration de vos données **préalablement**. diff --git a/docs/ja/operations/backup.md b/docs/ja/operations/backup.md index 994271371a4..b0cde00e23c 100644 --- a/docs/ja/operations/backup.md +++ b/docs/ja/operations/backup.md @@ -7,7 +7,7 @@ toc_title: "\u30C7\u30FC\u30BF\u30D0\u30C3\u30AF\u30A2" # データバックア {#data-backup} -ながら [複製](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [50Gbを超えるデータを含むMergeTreeのようなエンジンでは、テーブルを削除することはできません](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). しかし、これらの保障措置がカバーしないすべてのケースで回避. +ながら [複製](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [50Gbを超えるデータを含むMergeTreeのようなエンジンでは、テーブルを削除することはできません](server-configuration-parameters/settings.md#max-table-size-to-drop). しかし、これらの保障措置がカバーしないすべてのケースで回避. ヒューマンエラーを効果的に軽減するには、データのバックアップと復元のための戦略を慎重に準備する必要があります **事前に**. diff --git a/docs/ru/development/style.md b/docs/ru/development/style.md index 4d71dca46a7..1b211259bbb 100644 --- a/docs/ru/development/style.md +++ b/docs/ru/development/style.md @@ -714,6 +714,7 @@ auto s = std::string{"Hello"}; ### Пользовательская ошибка {#error-messages-user-error} Такая ошибка вызвана действиями пользователя (неверный синтаксис запроса) или конфигурацией внешних систем (кончилось место на диске). Предполагается, что пользователь может устранить её самостоятельно. Для этого в сообщении об ошибке должна содержаться следующая информация: + * что произошло. Это должно объясняться в пользовательских терминах (`Function pow() is not supported for data type UInt128`), а не загадочными конструкциями из кода (`runtime overload resolution failed in DB::BinaryOperationBuilder::Impl, UInt128, Int8>::kaboongleFastPath()`). * почему/где/когда -- любой контекст, который помогает отладить проблему. Представьте, как бы её отлаживали вы (программировать и пользоваться отладчиком нельзя). * что можно предпринять для устранения ошибки. Здесь можно перечислить типичные причины проблемы, настройки, влияющие на это поведение, и так далее. diff --git a/docs/ru/engines/database-engines/materialize-mysql.md b/docs/ru/engines/database-engines/materialize-mysql.md index f23ac0cddd6..3022542e294 100644 --- a/docs/ru/engines/database-engines/materialize-mysql.md +++ b/docs/ru/engines/database-engines/materialize-mysql.md @@ -93,6 +93,7 @@ DDL-запросы в MySQL конвертируются в соответств - Каскадные запросы `UPDATE/DELETE` не поддерживаются движком `MaterializeMySQL`. - Репликация может быть легко нарушена. - Прямые операции изменения данных в таблицах и базах данных `MaterializeMySQL` запрещены. +- На работу `MaterializeMySQL` влияет настройка [optimize_on_insert](../../operations/settings/settings.md#optimize-on-insert). Когда таблица на MySQL сервере меняется, происходит слияние данных в соответсвующей таблице в базе данных `MaterializeMySQL`. ## Примеры использования {#examples-of-use} @@ -156,4 +157,4 @@ SELECT * FROM mysql.test; └───┴─────┴──────┘ ``` -[Оригинальная статья](https://clickhouse.tech/docs/ru/database_engines/materialize-mysql/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/database-engines/materialize-mysql/) diff --git a/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md new file mode 100644 index 00000000000..9b68bcfc770 --- /dev/null +++ b/docs/ru/engines/table-engines/integrations/embedded-rocksdb.md @@ -0,0 +1,44 @@ +--- +toc_priority: 6 +toc_title: EmbeddedRocksDB +--- + +# Движок EmbeddedRocksDB {#EmbeddedRocksDB-engine} + +Этот движок позволяет интегрировать ClickHouse с [rocksdb](http://rocksdb.org/). + +## Создание таблицы {#table_engine-EmbeddedRocksDB-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = EmbeddedRocksDB +PRIMARY KEY(primary_key_name); +``` + +Обязательные параметры: + +- `primary_key_name` может быть любое имя столбца из списка столбцов. +- Указание первичного ключа `primary key` является обязательным. Он будет сериализован в двоичном формате как ключ `rocksdb`. +- Поддерживается только один столбец в первичном ключе. +- Столбцы, которые отличаются от первичного ключа, будут сериализованы в двоичном формате как значение `rockdb` в соответствующем порядке. +- Запросы с фильтрацией по ключу `equals` или `in` оптимизируются для поиска по нескольким ключам из `rocksdb`. + +Пример: + +``` sql +CREATE TABLE test +( + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32, +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY key; +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/table_engines/embedded-rocksdb/) \ No newline at end of file diff --git a/docs/ru/engines/table-engines/integrations/index.md b/docs/ru/engines/table-engines/integrations/index.md index 02189cf9e55..db7e527442e 100644 --- a/docs/ru/engines/table-engines/integrations/index.md +++ b/docs/ru/engines/table-engines/integrations/index.md @@ -12,7 +12,10 @@ toc_priority: 30 - [ODBC](../../../engines/table-engines/integrations/odbc.md) - [JDBC](../../../engines/table-engines/integrations/jdbc.md) - [MySQL](../../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) - [HDFS](../../../engines/table-engines/integrations/hdfs.md) - [Kafka](../../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) [Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/integrations/) diff --git a/docs/ru/engines/table-engines/integrations/mongodb.md b/docs/ru/engines/table-engines/integrations/mongodb.md new file mode 100644 index 00000000000..0765b3909de --- /dev/null +++ b/docs/ru/engines/table-engines/integrations/mongodb.md @@ -0,0 +1,57 @@ +--- +toc_priority: 7 +toc_title: MongoDB +--- + +# MongoDB {#mongodb} + +Движок таблиц MongoDB позволяет читать данные из коллекций СУБД MongoDB. В таблицах допустимы только плоские (не вложенные) типы данных. Запись (`INSERT`-запросы) не поддерживается. + +## Создание таблицы {#creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name +( + name1 [type1], + name2 [type2], + ... +) ENGINE = MongoDB(host:port, database, collection, user, password); +``` + +**Параметры движка** + +- `host:port` — адрес сервера MongoDB. + +- `database` — имя базы данных на удалённом сервере. + +- `collection` — имя коллекции на удалённом сервере. + +- `user` — пользователь MongoDB. + +- `password` — пароль пользователя. + +## Примеры использования {#usage-example} + +Таблица в ClickHouse для чтения данных из колекции MongoDB: + +``` text +CREATE TABLE mongo_table +( + key UInt64, + data String +) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse'); +``` + +Запрос к таблице: + +``` sql +SELECT COUNT() FROM mongo_table; +``` + +``` text +┌─count()─┐ +│ 4 │ +└─────────┘ +``` + +[Original article](https://clickhouse.tech/docs/ru/operations/table_engines/integrations/mongodb/) diff --git a/docs/ru/engines/table-engines/integrations/rabbitmq.md b/docs/ru/engines/table-engines/integrations/rabbitmq.md index dedb5842d68..f55163c1988 100644 --- a/docs/ru/engines/table-engines/integrations/rabbitmq.md +++ b/docs/ru/engines/table-engines/integrations/rabbitmq.md @@ -52,10 +52,26 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `rabbitmq_max_block_size` - `rabbitmq_flush_interval_ms` -Требуемая конфигурация: +Настройки форматов данных также могут быть добавлены в списке RabbitMQ настроек. + +Example: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64, + date DateTime + ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', + rabbitmq_exchange_name = 'exchange1', + rabbitmq_format = 'JSONEachRow', + rabbitmq_num_consumers = 5, + date_time_input_format = 'best_effort'; +``` Конфигурация сервера RabbitMQ добавляется с помощью конфигурационного файла ClickHouse. +Требуемая конфигурация: + ``` xml root @@ -63,16 +79,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ``` -Example: +Дополнительная конфигурация: -``` sql - CREATE TABLE queue ( - key UInt64, - value UInt64 - ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', - rabbitmq_exchange_name = 'exchange1', - rabbitmq_format = 'JSONEachRow', - rabbitmq_num_consumers = 5; +``` xml + + clickhouse + ``` ## Описание {#description} @@ -98,6 +110,7 @@ Example: - `consistent_hash` - данные равномерно распределяются между всеми связанными таблицами, где имя точки обмена совпадает. Обратите внимание, что этот тип обмена должен быть включен с помощью плагина RabbitMQ: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. Настройка `rabbitmq_queue_base` может быть использована в следующих случаях: + 1. чтобы восстановить чтение из ранее созданных очередей, если оно прекратилось по какой-либо причине, но очереди остались непустыми. Для восстановления чтения из одной конкретной очереди, нужно написать ее имя в `rabbitmq_queue_base` настройку и не указывать настройки `rabbitmq_num_consumers` и `rabbitmq_num_queues`. Чтобы восстановить чтение из всех очередей, которые были созданы для конкретной таблицы, необходимо совпадение следующих настроек: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. По умолчанию, если настройка `rabbitmq_queue_base` не указана, будут использованы уникальные для каждой таблицы имена очередей. 2. чтобы объявить одни и те же очереди для разных таблиц, что позволяет создавать несколько параллельных подписчиков на каждую из очередей. То есть обеспечивается лучшая производительность. В данном случае, для таких таблиц также необходимо совпадение настроек: `rabbitmq_num_consumers`, `rabbitmq_num_queues`. 3. чтобы повторно использовать созданные c `durable` настройкой очереди, так как они не удаляются автоматически (но могут быть удалены с помощью любого RabbitMQ CLI). diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 9b2a5eafca3..6fc566b7c31 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -37,7 +37,10 @@ ORDER BY expr [PARTITION BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] -[TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...] +[TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ] + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ] [SETTINGS name=value, ...] ``` @@ -71,7 +74,7 @@ ORDER BY expr Выражение должно возвращать столбец `Date` или `DateTime`. Пример: `TTL date + INTERVAL 1 DAY`. - Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` указывает действие, которое будет выполнено с частью, удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`). Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`. + Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` указывает действие, которое будет выполнено с частью: удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`), или агрегирование данных в устаревших строках. Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`. Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl) @@ -91,6 +94,7 @@ ORDER BY expr - `max_parts_in_total` — максимальное количество кусков во всех партициях. - `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная. - `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная. + - `max_partitions_to_read` — Ограничивает максимальное число партиций для чтения в одном запросе. Также возможно указать настройку [max_partitions_to_read](../../../operations/settings/merge-tree-settings.md#max-partitions-to-read) в глобальных настройках. **Пример задания секций** @@ -443,16 +447,28 @@ ALTER TABLE example_table Для таблицы можно задать одно выражение для устаревания данных, а также несколько выражений, по срабатывании которых данные переместятся на [некоторый диск или том](#table_engine-mergetree-multiple-volumes). Когда некоторые данные в таблице устаревают, ClickHouse удаляет все соответствующие строки. ``` sql -TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ... +TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ... + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ``` За каждым TTL выражением может следовать тип действия, которое выполняется после достижения времени, соответствующего результату TTL выражения: - `DELETE` - удалить данные (действие по умолчанию); - `TO DISK 'aaa'` - переместить данные на диск `aaa`; -- `TO VOLUME 'bbb'` - переместить данные на том `bbb`. +- `TO VOLUME 'bbb'` - переместить данные на том `bbb`; +- `GROUP BY` - агрегировать данные. -Примеры: +В секции `WHERE` можно задать условие удаления или агрегирования устаревших строк (для перемещения условие `WHERE` не применимо). + +Колонки, по которым агрегируются данные в `GROUP BY`, должны являться префиксом первичного ключа таблицы. + +Если колонка не является частью выражения `GROUP BY` и не задается напрямую в секции `SET`, в результирующих строках она будет содержать случайное значение, взятое из одной из сгруппированных строк (как будто к ней применяется агрегирующая функция `any`). + +**Примеры** + +Создание таблицы с TTL: ``` sql CREATE TABLE example_table @@ -468,13 +484,43 @@ TTL d + INTERVAL 1 MONTH [DELETE], d + INTERVAL 2 WEEK TO DISK 'bbb'; ``` -Изменение TTL +Изменение TTL: ``` sql ALTER TABLE example_table MODIFY TTL d + INTERVAL 1 DAY; ``` +Создание таблицы, в которой строки устаревают через месяц. Устаревшие строки удаляются, если дата выпадает на понедельник: + +``` sql +CREATE TABLE table_with_where +( + d DateTime, + a Int +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; +``` + +Создание таблицы, где устаревшие строки агрегируются. В результирующих строках колонка `x` содержит максимальное значение по сгруппированным строкам, `y` — минимальное значение, а `d` — случайное значение из одной из сгуппированных строк. + +``` sql +CREATE TABLE table_for_aggregation +( + d DateTime, + k1 Int, + k2 Int, + x Int, + y Int +) +ENGINE = MergeTree +ORDER BY k1, k2 +TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); +``` + **Удаление данных** Данные с истекшим TTL удаляются, когда ClickHouse мёржит куски данных. @@ -666,4 +712,4 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' После выполнения фоновых слияний или мутаций старые куски не удаляются сразу, а через некоторое время (табличная настройка `old_parts_lifetime`). Также они не перемещаются на другие тома или диски, поэтому до момента удаления они продолжают учитываться при подсчёте занятого дискового пространства. -[Оригинальная статья](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/engines/table-engines/mergetree-family/mergetree/) diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 0dcb6fd307d..165b54d9b62 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -5,7 +5,7 @@ toc_title: "\u0420\u0435\u0437\u0435\u0440\u0432\u043d\u043e\u0435\u0020\u043a\u # Резервное копирование данных {#rezervnoe-kopirovanie-dannykh} -[Репликация](../engines/table-engines/mergetree-family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены. +[Репликация](../engines/table-engines/mergetree-family/replication.md) обеспечивает защиту от аппаратных сбоев, но не защищает от человеческих ошибок: случайного удаления данных, удаления не той таблицы, которую надо было, или таблицы на не том кластере, а также программных ошибок, которые приводят к неправильной обработке данных или их повреждению. Во многих случаях подобные ошибки влияют на все реплики. ClickHouse имеет встроенные средства защиты для предотвращения некоторых типов ошибок — например, по умолчанию [не получится удалить таблицы \*MergeTree, содержащие более 50 Гб данных, одной командой](server-configuration-parameters/settings.md#max-table-size-to-drop). Однако эти средства защиты не охватывают все возможные случаи и могут быть обойдены. Для того чтобы эффективно уменьшить возможные человеческие ошибки, следует тщательно подготовить стратегию резервного копирования и восстановления данных **заранее**. diff --git a/docs/ru/operations/settings/merge-tree-settings.md b/docs/ru/operations/settings/merge-tree-settings.md index e78d4c98683..bfc0b0a2644 100644 --- a/docs/ru/operations/settings/merge-tree-settings.md +++ b/docs/ru/operations/settings/merge-tree-settings.md @@ -181,4 +181,16 @@ Eсли суммарное число активных кусков во все При старте ClickHouse читает все куски всех таблиц (читает файлы с метаданными кусков), чтобы построить в ОЗУ список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время, и это время можно сократить, увеличив `max_part_loading_threads` (если при этом процессе есть недозагруженность CPU и диска). -{## [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/merge-tree-settings/) ##} +## max_partitions_to_read {#max-partitions-to-read} + +Ограничивает максимальное число партиций для чтения в одном запросе. + +Указанное при создании таблицы значение настройки может быть переназначено настройкой на уровне запроса. + +Возможные значения: + +- Любое положительное целое число. + +Значение по умолчанию: -1 (неограниченно). + +[Original article](https://clickhouse.tech/docs/ru/operations/settings/merge_tree_settings/) diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 73dc0b9d944..7322b6c9184 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -283,12 +283,10 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( ## input_format_tsv_empty_as_default {#settings-input-format-tsv-empty-as-default} -Если эта настройка включена, замените пустые поля ввода в TSV значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`. +Если эта настройка включена, все пустые поля во входящем TSV заменяются значениями по умолчанию. Для сложных выражений по умолчанию также должна быть включена настройка `input_format_defaults_for_omitted_fields`. По умолчанию отключена. -Disabled by default. - ## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number} Включает или отключает парсинг значений перечислений как идентификаторов перечислений для входного формата TSV. @@ -406,21 +404,46 @@ INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; Возможные значения: -- `'best_effort'` — включает расширенный парсинг. +- `best_effort` — включает расширенный парсинг. -ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `'2018-06-08T01:02:03.000Z'`. +ClickHouse может парсить базовый формат `YYYY-MM-DD HH:MM:SS` и все форматы [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601). Например, `2018-06-08T01:02:03.000Z`. -- `'basic'` — используется базовый парсер. +- `basic` — используется базовый парсер. -ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `'2019-08-20 10:18:56'` или `2019-08-20`. +ClickHouse может парсить только базовый формат `YYYY-MM-DD HH:MM:SS` или `YYYY-MM-DD`. Например, `2019-08-20 10:18:56` или `2019-08-20`. -Значение по умолчанию: `'basic'`. +Значение по умолчанию: `basic`. См. также: - [Тип данных DateTime.](../../sql-reference/data-types/datetime.md) - [Функции для работы с датой и временем.](../../sql-reference/functions/date-time-functions.md) +## date_time_output_format {#settings-date_time_output_format} + +Позволяет выбрать разные выходные форматы текстового представления даты и времени. + +Возможные значения: + +- `simple` - простой выходной формат. + + Выходные дата и время Clickhouse в формате `YYYY-MM-DD hh:mm:ss`. Например, `2019-08-20 10:18:56`. Расчет выполняется в соответствии с часовым поясом типа данных (если он есть) или часовым поясом сервера. + +- `iso` - выходной формат ISO. + + Выходные дата и время Clickhouse в формате [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ`. Например, `2019-08-20T10:18:56Z`. Обратите внимание, что выходные данные отображаются в формате UTC (`Z` означает UTC). + +- `unix_timestamp` - выходной формат Unix. + + Выходные дата и время в формате [Unix](https://en.wikipedia.org/wiki/Unix_time). Например `1566285536`. + +Значение по умолчанию: `simple`. + +См. также: + +- [Тип данных DateTime](../../sql-reference/data-types/datetime.md) +- [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) + ## join_default_strictness {#settings-join_default_strictness} Устанавливает строгость по умолчанию для [JOIN](../../sql-reference/statements/select/join.md#select-join). @@ -683,7 +706,7 @@ ClickHouse использует этот параметр при чтении д Установка логирования запроса. -Запросы, переданные в ClickHouse с этой установкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log). +Запросы, переданные в ClickHouse с этой настройкой, логируются согласно правилам конфигурационного параметра сервера [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log). Пример: @@ -1496,7 +1519,7 @@ ClickHouse генерирует исключение - Тип: секунды - Значение по умолчанию: 60 секунд -Управляет скоростью обнуления ошибок в распределенных таблицах. Если реплика недоступна в течение некоторого времени, накапливает 5 ошибок, а distributed_replica_error_half_life установлена на 1 секунду, то реплика считается нормальной через 3 секунды после последней ошибки. +Управляет скоростью обнуления счетчика ошибок в распределенных таблицах. Предположим, реплика остается недоступна в течение какого-то времени, и за этот период накопилось 5 ошибок. Если настройка `distributed_replica_error_half_life` установлена в значение 1 секунда, то реплика снова будет считаться доступной через 3 секунды после последней ошибки. См. также: @@ -1648,7 +1671,7 @@ ClickHouse генерирует исключение - Тип: bool - Значение по умолчанию: True -Обеспечивает параллельный анализ форматов данных с сохранением порядка. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow. +Включает режим, при котором входящие данные парсятся параллельно, но с сохранением исходного порядка следования. Поддерживается только для форматов TSV, TKSV, CSV и JSONEachRow. ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing} @@ -1962,7 +1985,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; ## output_format_pretty_grid_charset {#output-format-pretty-grid-charset} -Позволяет изменить кодировку, которая используется для печати грид-границ. Доступны следующие кодировки: UTF-8, ASCII. +Позволяет изменить кодировку, которая используется для отрисовки таблицы при выводе результатов запросов. Доступны следующие кодировки: UTF-8, ASCII. **Пример** @@ -2448,4 +2471,70 @@ SELECT SUM(-1), MAX(0) FROM system.one WHERE 0; Значение по умолчанию: `16`. +## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability} + +Задает вероятность того, что ClickHouse начнет трассировку для выполненных запросов (если не указан [входящий контекст](https://www.w3.org/TR/trace-context/) трассировки). + +Возможные значения: + +- 0 — трассировка для выполненных запросов отключена (если не указан входящий контекст трассировки). +- Положительное число с плавающей точкой в диапазоне [0..1]. Например, при значении настройки, равной `0,5`, ClickHouse начнет трассировку в среднем для половины запросов. +- 1 — трассировка для всех выполненных запросов включена. + +Значение по умолчанию: `0`. + +## optimize_on_insert {#optimize-on-insert} + +Включает или выключает преобразование данных перед добавлением в таблицу, как будто над добавляемым блоком предварительно было произведено слияние (в соответствии с движком таблицы). + +Возможные значения: + +- 0 — выключена +- 1 — включена. + +Значение по умолчанию: 1. + +**Пример** + +Сравните добавление данных при включенной и выключенной настройке: + +Запрос: + +```sql +SET optimize_on_insert = 1; + +CREATE TABLE test1 (`FirstTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY FirstTable; + +INSERT INTO test1 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test1; + +SET optimize_on_insert = 0; + +CREATE TABLE test2 (`SecondTable` UInt32) ENGINE = ReplacingMergeTree ORDER BY SecondTable; + +INSERT INTO test2 SELECT number % 2 FROM numbers(5); + +SELECT * FROM test2; +``` + +Результат: + +``` text +┌─FirstTable─┐ +│ 0 │ +│ 1 │ +└────────────┘ + +┌─SecondTable─┐ +│ 0 │ +│ 0 │ +│ 0 │ +│ 1 │ +│ 1 │ +└─────────────┘ +``` + +Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md). + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) diff --git a/docs/ru/operations/system-tables/distributed_ddl_queue.md b/docs/ru/operations/system-tables/distributed_ddl_queue.md new file mode 100644 index 00000000000..058ed06f639 --- /dev/null +++ b/docs/ru/operations/system-tables/distributed_ddl_queue.md @@ -0,0 +1,65 @@ +# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue} + +Содержит информацию о [распределенных ddl запросах (секция ON CLUSTER)](../../sql-reference/distributed-ddl.md), которые были выполнены на кластере. + +Столбцы: + +- `entry` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — имя хоста. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP-адрес хоста. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт для соединения с сервером. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — состояние запроса. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — имя кластера. +- `query` ([String](../../sql-reference/data-types/string.md)) — выполненный запрос. +- `initiator` ([String](../../sql-reference/data-types/string.md)) — узел, выполнивший запрос. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса. +- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время окончания запроса. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — продолжительность выполнения запроса (в миллисекундах). +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — код исключения из [ZooKeeper](../../operations/tips.md#zookeeper). + +**Пример** + +``` sql +SELECT * +FROM system.distributed_ddl_queue +WHERE cluster = 'test_cluster' +LIMIT 2 +FORMAT Vertical + +Query id: f544e72a-6641-43f1-836b-24baa1c9632a + +Row 1: +────── +entry: query-0000000000 +host_name: clickhouse01 +host_address: 172.23.0.11 +port: 9000 +status: Finished +cluster: test_cluster +query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster +initiator: clickhouse01:9000 +query_start_time: 2020-12-30 13:07:51 +query_finish_time: 2020-12-30 13:07:51 +query_duration_ms: 6 +exception_code: ZOK + +Row 2: +────── +entry: query-0000000000 +host_name: clickhouse02 +host_address: 172.23.0.12 +port: 9000 +status: Finished +cluster: test_cluster +query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster +initiator: clickhouse01:9000 +query_start_time: 2020-12-30 13:07:51 +query_finish_time: 2020-12-30 13:07:51 +query_duration_ms: 6 +exception_code: ZOK + +2 rows in set. Elapsed: 0.025 sec. +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) + \ No newline at end of file diff --git a/docs/ru/operations/system-tables/opentelemetry_span_log.md b/docs/ru/operations/system-tables/opentelemetry_span_log.md new file mode 100644 index 00000000000..96555064b0e --- /dev/null +++ b/docs/ru/operations/system-tables/opentelemetry_span_log.md @@ -0,0 +1,49 @@ +# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log} + +Содержит информацию о [trace spans](https://opentracing.io/docs/overview/spans/) для выполненных запросов. + +Столбцы: + +- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — идентификатор трассировки для выполненного запроса. + +- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор `trace span`. + +- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор родительского `trace span`. + +- `operation_name` ([String](../../sql-reference/data-types/string.md)) — имя операции. + +- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время начала `trace span` (в микросекундах). + +- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — время окончания `trace span` (в микросекундах). + +- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — дата окончания `trace span`. + +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — имена [атрибутов](https://opentelemetry.io/docs/go/instrumentation/#attributes) в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте [OpenTelemetry](https://opentelemetry.io/). + +- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — значения атрибутов в зависимости от `trace span`. Заполняются согласно рекомендациям в стандарте `OpenTelemetry`. + +**Пример** + +Запрос: + +``` sql +SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical; +``` + +Результат: + +``` text +Row 1: +────── +trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914 +span_id: 701487461015578150 +parent_span_id: 2991972114672045096 +operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl() +start_time_us: 1612374594529090 +finish_time_us: 1612374594529108 +finish_date: 2021-02-03 +attribute.names: [] +attribute.values: [] +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/opentelemetry_span_log) diff --git a/docs/ru/operations/system-tables/part_log.md b/docs/ru/operations/system-tables/part_log.md index 255ece76ee2..bba4fda6135 100644 --- a/docs/ru/operations/system-tables/part_log.md +++ b/docs/ru/operations/system-tables/part_log.md @@ -6,29 +6,62 @@ Столбцы: -- `event_type` (Enum) — тип события. Столбец может содержать одно из следующих значений: +- `query_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор запроса `INSERT`, создавшего этот кусок. +- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип события. Столбец может содержать одно из следующих значений: - `NEW_PART` — вставка нового куска. - `MERGE_PARTS` — слияние кусков. - `DOWNLOAD_PART` — загрузка с реплики. - `REMOVE_PART` — удаление или отсоединение из таблицы с помощью [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). - `MUTATE_PART` — изменение куска. - `MOVE_PART` — перемещение куска между дисками. -- `event_date` (Date) — дата события. -- `event_time` (DateTime) — время события. -- `duration_ms` (UInt64) — длительность. -- `database` (String) — имя базы данных, в которой находится кусок. -- `table` (String) — имя таблицы, в которой находится кусок. -- `part_name` (String) — имя куска. -- `partition_id` (String) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение ‘all’, если таблица партициируется по выражению `tuple()`. -- `rows` (UInt64) — число строк в куске. -- `size_in_bytes` (UInt64) — размер куска данных в байтах. -- `merged_from` (Array(String)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска). -- `bytes_uncompressed` (UInt64) — количество прочитанных разжатых байт. -- `read_rows` (UInt64) — сколько было прочитано строк при слиянии кусков. -- `read_bytes` (UInt64) — сколько было прочитано байт при слиянии кусков. -- `error` (UInt16) — код ошибки, возникшей при текущем событии. -- `exception` (String) — текст ошибки. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события. +- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — длительность. +- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится кусок. +- `table` ([String](../../sql-reference/data-types/string.md)) — имя таблицы, в которой находится кусок. +- `part_name` ([String](../../sql-reference/data-types/string.md)) — имя куска. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — идентификатор партиции, в которую был добавлен кусок. В столбце будет значение `all`, если таблица партициируется по выражению `tuple()`. +- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — абсолютный путь к папке с файлами кусков данных. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — число строк в куске. +- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — размер куска данных в байтах. +- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — массив имён кусков, из которых образован текущий кусок в результате слияния (также столбец заполняется в случае скачивания уже смерженного куска). +- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — количество прочитанных не сжатых байт. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано строк при слиянии кусков. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — сколько было прочитано байт при слиянии кусков. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — максимальная разница между выделенной и освобождённой памятью в контексте потока. +- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — код ошибки, возникшей при текущем событии. +- `exception` ([String](../../sql-reference/data-types/string.md)) — текст ошибки. Системная таблица `system.part_log` будет создана после первой вставки данных в таблицу `MergeTree`. +**Пример** + +``` sql +SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical; +``` + +``` text +Row 1: +────── +query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31 +event_type: NewPart +event_date: 2021-02-02 +event_time: 2021-02-02 11:14:28 +duration_ms: 35 +database: default +table: log_mt_2 +part_name: all_1_1_0 +partition_id: all +path_on_disk: db/data/default/log_mt_2/all_1_1_0/ +rows: 115418 +size_in_bytes: 1074311 +merged_from: [] +bytes_uncompressed: 0 +read_rows: 0 +read_bytes: 0 +peak_memory_usage: 0 +error: 0 +exception: +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/part_log) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md index 97edd5773c8..f44e65831a9 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmax.md @@ -4,8 +4,63 @@ toc_priority: 106 # argMax {#agg-function-argmax} -Синтаксис: `argMax(arg, val)` +Вычисляет значение `arg` при максимальном значении `val`. Если есть несколько разных значений `arg` для максимальных значений `val`, возвращает первое попавшееся из таких значений. -Вычисляет значение arg при максимальном значении val. Если есть несколько разных значений arg для максимальных значений val, то выдаётся первое попавшееся из таких значений. +Если функции передан кортеж, то будет выведен кортеж с максимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). -[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) +**Синтаксис** + +``` sql +argMax(arg, val) +``` + +или + +``` sql +argMax(tuple(arg, val)) +``` + +**Параметры** + +- `arg` — аргумент. +- `val` — значение. + +**Возвращаемое значение** + +- Значение `arg`, соответствующее максимальному значению `val`. + +Тип: соответствует типу `arg`. + +Если передан кортеж: + +- Кортеж `(arg, val)` c максимальным значением `val` и соответствующим ему `arg`. + +Тип: [Tuple](../../../sql-reference/data-types/tuple.md). + +**Пример** + +Исходная таблица: + +``` text +┌─user─────┬─salary─┐ +│ director │ 5000 │ +│ manager │ 3000 │ +│ worker │ 1000 │ +└──────────┴────────┘ +``` + +Запрос: + +``` sql +SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary; +``` + +Результат: + +``` text +┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐ +│ director │ ('director',5000) │ +└──────────────────────┴─────────────────────────────┘ +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmax/) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md index 58161cd226a..8c25b79f92a 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/argmin.md @@ -4,11 +4,42 @@ toc_priority: 105 # argMin {#agg-function-argmin} -Синтаксис: `argMin(arg, val)` +Вычисляет значение `arg` при минимальном значении `val`. Если есть несколько разных значений `arg` для минимальных значений `val`, возвращает первое попавшееся из таких значений. -Вычисляет значение arg при минимальном значении val. Если есть несколько разных значений arg для минимальных значений val, то выдаётся первое попавшееся из таких значений. +Если функции передан кортеж, то будет выведен кортеж с минимальным значением `val`. Удобно использовать для работы с [SimpleAggregateFunction](../../../sql-reference/data-types/simpleaggregatefunction.md). -**Пример:** +**Синтаксис** + +``` sql +argMin(arg, val) +``` + +или + +``` sql +argMin(tuple(arg, val)) +``` + +**Параметры** + +- `arg` — аргумент. +- `val` — значение. + +**Возвращаемое значение** + +- Значение `arg`, соответствующее минимальному значению `val`. + +Тип: соответствует типу `arg`. + +Если передан кортеж: + +- Кортеж `(arg, val)` c минимальным значением `val` и соответствующим ему `arg`. + +Тип: [Tuple](../../../sql-reference/data-types/tuple.md). + +**Пример** + +Исходная таблица: ``` text ┌─user─────┬─salary─┐ @@ -18,14 +49,18 @@ toc_priority: 105 └──────────┴────────┘ ``` +Запрос: + ``` sql -SELECT argMin(user, salary) FROM salary +SELECT argMin(user, salary), argMin(tuple(user, salary)) FROM salary; ``` +Результат: + ``` text -┌─argMin(user, salary)─┐ -│ worker │ -└──────────────────────┘ +┌─argMin(user, salary)─┬─argMin(tuple(user, salary))─┐ +│ worker │ ('worker',1000) │ +└──────────────────────┴─────────────────────────────┘ ``` -[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmin/) +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/argmin/) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md new file mode 100644 index 00000000000..a4647ecfb34 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -0,0 +1,72 @@ +--- +toc_priority: 310 +toc_title: mannWhitneyUTest +--- + +# mannWhitneyUTest {#mannwhitneyutest} + +Вычисляет U-критерий Манна — Уитни для выборок из двух генеральных совокупностей. + +**Синтаксис** + +``` sql +mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_index) +``` + +Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке. +Проверяется нулевая гипотеза, что генеральные совокупности стохастически равны. Наряду с двусторонней гипотезой могут быть проверены и односторонние. +Для применения U-критерия Манна — Уитни закон распределения генеральных совокупностей не обязан быть нормальным. + +**Параметры** + +- `alternative` — альтернативная гипотеза. (Необязательный параметр, по умолчанию: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). + - `'two-sided'`; + - `'greater'`; + - `'less'`. +- `continuity_correction` - если не 0, то при вычислении p-значения применяется коррекция непрерывности. (Необязательный параметр, по умолчанию: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md). + + +**Возвращаемые значения** + +[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: + +- вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md). +- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md). + + +**Пример** + +Таблица: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 10 │ 0 │ +│ 11 │ 0 │ +│ 12 │ 0 │ +│ 1 │ 1 │ +│ 2 │ 1 │ +│ 3 │ 1 │ +└─────────────┴──────────────┘ +``` + +Запрос: + +``` sql +SELECT mannWhitneyUTest('greater')(sample_data, sample_index) FROM mww_ttest; +``` + +Результат: + +``` text +┌─mannWhitneyUTest('greater')(sample_data, sample_index)─┐ +│ (9,0.04042779918503192) │ +└────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [U-критерий Манна — Уитни](https://ru.wikipedia.org/wiki/U-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%9C%D0%B0%D0%BD%D0%BD%D0%B0_%E2%80%94_%D0%A3%D0%B8%D1%82%D0%BD%D0%B8) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/mannwhitneyutest/) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md new file mode 100644 index 00000000000..77378de95d1 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/studentttest.md @@ -0,0 +1,66 @@ +--- +toc_priority: 300 +toc_title: studentTTest +--- + +# studentTTest {#studentttest} + +Вычисляет t-критерий Стьюдента для выборок из двух генеральных совокупностей. + +**Синтаксис** + +``` sql +studentTTest(sample_data, sample_index) +``` + +Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке. +Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Стьюдента распределение в генеральных совокупностях должно быть нормальным и дисперсии должны совпадать. + +**Параметры** + +- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Возвращаемые значения** + +[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: + +- вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md). +- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md). + + +**Пример** + +Таблица: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 20.3 │ 0 │ +│ 21.1 │ 0 │ +│ 21.9 │ 1 │ +│ 21.7 │ 0 │ +│ 19.9 │ 1 │ +│ 21.8 │ 1 │ +└─────────────┴──────────────┘ +``` + +Запрос: + +``` sql +SELECT studentTTest(sample_data, sample_index) FROM student_ttest; +``` + +Результат: + +``` text +┌─studentTTest(sample_data, sample_index)───┐ +│ (-0.21739130434783777,0.8385421208415731) │ +└───────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [t-критерий Стьюдента](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A1%D1%82%D1%8C%D1%8E%D0%B4%D0%B5%D0%BD%D1%82%D0%B0) +- [welchTTest](welchttest.md#welchttest) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/studentttest/) diff --git a/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md new file mode 100644 index 00000000000..16c122d1b49 --- /dev/null +++ b/docs/ru/sql-reference/aggregate-functions/reference/welchttest.md @@ -0,0 +1,66 @@ +--- +toc_priority: 301 +toc_title: welchTTest +--- + +# welchTTest {#welchttest} + +Вычисляет t-критерий Уэлча для выборок из двух генеральных совокупностей. + +**Синтаксис** + +``` sql +welchTTest(sample_data, sample_index) +``` + +Значения выборок берутся из столбца `sample_data`. Если `sample_index` равно 0, то значение из этой строки принадлежит первой выборке. Во всех остальных случаях значение принадлежит второй выборке. +Проверяется нулевая гипотеза, что средние значения генеральных совокупностей совпадают. Для применения t-критерия Уэлча распределение в генеральных совокупностях должно быть нормальным. Дисперсии могут не совпадать. + +**Параметры** + +- `sample_data` — данные выборок. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — индексы выборок. [Integer](../../../sql-reference/data-types/int-uint.md). + +**Возвращаемые значения** + +[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: + +- вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md). +- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md). + + +**Пример** + +Таблица: + +``` text +┌─sample_data─┬─sample_index─┐ +│ 20.3 │ 0 │ +│ 22.1 │ 0 │ +│ 21.9 │ 0 │ +│ 18.9 │ 1 │ +│ 20.3 │ 1 │ +│ 19 │ 1 │ +└─────────────┴──────────────┘ +``` + +Запрос: + +``` sql +SELECT welchTTest(sample_data, sample_index) FROM welch_ttest; +``` + +Результат: + +``` text +┌─welchTTest(sample_data, sample_index)─────┐ +│ (2.7988719532211235,0.051807360348581945) │ +└───────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [t-критерий Уэлча](https://ru.wikipedia.org/wiki/T-%D0%BA%D1%80%D0%B8%D1%82%D0%B5%D1%80%D0%B8%D0%B9_%D0%A3%D1%8D%D0%BB%D1%87%D0%B0) +- [studentTTest](studentttest.md#studentttest) + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/aggregate-functions/reference/welchTTest/) diff --git a/docs/ru/sql-reference/data-types/array.md b/docs/ru/sql-reference/data-types/array.md index 906246b66ee..86a23ed041b 100644 --- a/docs/ru/sql-reference/data-types/array.md +++ b/docs/ru/sql-reference/data-types/array.md @@ -47,6 +47,8 @@ SELECT [1, 2] AS x, toTypeName(x) ## Особенности работы с типами данных {#osobennosti-raboty-s-tipami-dannykh} +Максимальный размер массива ограничен одним миллионом элементов. + При создании массива «на лету» ClickHouse автоматически определяет тип аргументов как наиболее узкий тип данных, в котором можно хранить все перечисленные аргументы. Если среди аргументов есть [NULL](../../sql-reference/data-types/array.md#null-literal) или аргумент типа [Nullable](nullable.md#data_type-nullable), то тип элементов массива — [Nullable](nullable.md). Если ClickHouse не смог подобрать тип данных, то он сгенерирует исключение. Это произойдёт, например, при попытке создать массив одновременно со строками и числами `SELECT array(1, 'a')`. diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 9894fa2802b..ffdf83e5bd0 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,7 +27,7 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения типа `DateTime` в формате `YYYY-MM-DD hh:mm:ss`. Отображение можно поменять с помощью функции [formatDateTime](../../sql-reference/data-types/datetime.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format). diff --git a/docs/ru/sql-reference/data-types/map.md b/docs/ru/sql-reference/data-types/map.md new file mode 100644 index 00000000000..6cb8ccf1143 --- /dev/null +++ b/docs/ru/sql-reference/data-types/map.md @@ -0,0 +1,69 @@ +--- +toc_priority: 65 +toc_title: Map(key, value) +--- + +# Map(key, value) {#data_type-map} + +Тип данных `Map(key, value)` хранит пары `ключ:значение`. + +**Параметры** +- `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md). +- `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md). + +!!! warning "Предупреждение" + Сейчас использование типа данных `Map` является экспериментальной возможностью. Чтобы использовать этот тип данных, включите настройку `allow_experimental_map_type = 1`. + +Чтобы получить значение из колонки `a Map('key', 'value')`, используйте синтаксис `a['key']`. В настоящее время такая подстановка работает по алгоритму с линейной сложностью. + +**Примеры** + +Рассмотрим таблицу: + +``` sql +CREATE TABLE table_map (a Map(String, UInt64)) ENGINE=Memory; +INSERT INTO table_map VALUES ({'key1':1, 'key2':10}), ({'key1':2,'key2':20}), ({'key1':3,'key2':30}); +``` + +Выборка всех значений ключа `key2`: + +```sql +SELECT a['key2'] FROM table_map; +``` +Результат: + +```text +┌─arrayElement(a, 'key2')─┐ +│ 10 │ +│ 20 │ +│ 30 │ +└─────────────────────────┘ +``` + +Если для какого-то ключа `key` в колонке с типом `Map()` нет значения, запрос возвращает нули для числовых колонок, пустые строки или пустые массивы. + +```sql +INSERT INTO table_map VALUES ({'key3':100}), ({}); +SELECT a['key3'] FROM table_map; +``` + +Результат: + +```text +┌─arrayElement(a, 'key3')─┐ +│ 100 │ +│ 0 │ +└─────────────────────────┘ +┌─arrayElement(a, 'key3')─┐ +│ 0 │ +│ 0 │ +│ 0 │ +└─────────────────────────┘ +``` + +**См. также** + +- функция [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) +- функция [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) + +[Original article](https://clickhouse.tech/docs/ru/data-types/map/) diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index fc4a3ac7285..f6b8b670563 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -205,8 +205,8 @@ RANGE(MIN first MAX last) Особенности алгоритма: - Если не найден `id` или для найденного `id` не найден диапазон, то возвращается значение по умолчанию для словаря. -- Если есть перекрывающиеся диапазоны, то можно использовать любой подходящий. -- Если граница диапазона `NULL` или некорректная дата (1900-01-01, 2039-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон. +- Если есть перекрывающиеся диапазоны, то возвращается значение из любого (случайного) подходящего диапазона. +- Если граница диапазона `NULL` или некорректная дата (1900-01-01), то диапазон считается открытым. Диапазон может быть открытым с обеих сторон. Пример конфигурации: diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index 015d14b9de5..80057e6f0e0 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -1135,11 +1135,225 @@ SELECT Функция `arrayFirstIndex` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. -## arraySum(\[func,\] arr1, …) {#array-sum} +## arrayMin {#array-min} -Возвращает сумму значений функции `func`. Если функция не указана - просто возвращает сумму элементов массива. +Возвращает значение минимального элемента в исходном массиве. -Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. +Если передана функция `func`, возвращается минимум из элементов массива, преобразованных этой функцией. + +Функция `arrayMin` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. + +**Синтаксис** + +```sql +arrayMin([func,] arr) +``` + +**Параметры** + +- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — массив. [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Минимальное значение функции (или минимальный элемент массива). + +Тип: если передана `func`, соответствует типу ее возвращаемого значения, иначе соответствует типу элементов массива. + +**Примеры** + +Запрос: + +```sql +SELECT arrayMin([1, 2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 1 │ +└─────┘ +``` + +Запрос: + +```sql +SELECT arrayMin(x -> (-x), [1, 2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ -4 │ +└─────┘ +``` + +## arrayMax {#array-max} + +Возвращает значение максимального элемента в исходном массиве. + +Если передана функция `func`, возвращается максимум из элементов массива, преобразованных этой функцией. + +Функция `arrayMax` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. + +**Синтаксис** + +```sql +arrayMax([func,] arr) +``` + +**Параметры** + +- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — массив. [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Максимальное значение функции (или максимальный элемент массива). + +Тип: если передана `func`, соответствует типу ее возвращаемого значения, иначе соответствует типу элементов массива. + +**Примеры** + +Запрос: + +```sql +SELECT arrayMax([1, 2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 4 │ +└─────┘ +``` + +Запрос: + +```sql +SELECT arrayMax(x -> (-x), [1, 2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ -1 │ +└─────┘ +``` + +## arraySum {#array-sum} + +Возвращает сумму элементов в исходном массиве. + +Если передана функция `func`, возвращается сумма элементов массива, преобразованных этой функцией. + +Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. + +**Синтаксис** + +```sql +arraySum([func,] arr) +``` + +**Параметры** + +- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — массив. [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Сумма значений функции (или сумма элементов массива). + +Тип: для Decimal чисел в исходном массиве (если функция `func` была передана, то для чисел, преобразованных ею) — [Decimal128](../../sql-reference/data-types/decimal.md), для чисел с плавающей точкой — [Float64](../../sql-reference/data-types/float.md), для беззнаковых целых чисел — [UInt64](../../sql-reference/data-types/int-uint.md), для целых чисел со знаком — [Int64](../../sql-reference/data-types/int-uint.md). + +**Примеры** + +Запрос: + +```sql +SELECT arraySum([2, 3]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 5 │ +└─────┘ +``` + +Запрос: + +```sql +SELECT arraySum(x -> x*x, [2, 3]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 13 │ +└─────┘ +``` + +## arrayAvg {#array-avg} + +Возвращает среднее значение элементов в исходном массиве. + +Если передана функция `func`, возвращается среднее значение элементов массива, преобразованных этой функцией. + +Функция `arrayAvg` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. + +**Синтаксис** + +```sql +arrayAvg([func,] arr) +``` + +**Параметры** + +- `func` — функция. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — массив. [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Среднее значение функции (или среднее значение элементов массива). + +Тип: [Float64](../../sql-reference/data-types/float.md). + +**Примеры** + +Запрос: + +```sql +SELECT arrayAvg([1, 2, 4]) AS res; +``` + +Результат: + +```text +┌────────────────res─┐ +│ 2.3333333333333335 │ +└────────────────────┘ +``` + +Запрос: + +```sql +SELECT arrayAvg(x -> (x * x), [2, 4]) AS res; +``` + +Результат: + +```text +┌─res─┐ +│ 10 │ +└─────┘ +``` ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} diff --git a/docs/ru/sql-reference/functions/ip-address-functions.md b/docs/ru/sql-reference/functions/ip-address-functions.md index 724fb97c0d5..52f0a92bc9f 100644 --- a/docs/ru/sql-reference/functions/ip-address-functions.md +++ b/docs/ru/sql-reference/functions/ip-address-functions.md @@ -243,4 +243,81 @@ SELECT └───────────────────────────────────┴──────────────────────────────────┘ ``` +## isIPv4String {#isipv4string} + +Определяет, является ли строка адресом IPv4 или нет. Также вернет `0`, если `string` — адрес IPv6. + +**Синтаксис** + +```sql +isIPv4String(string) +``` + +**Параметры** + +- `string` — IP адрес. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- `1` если `string` является адресом IPv4 , иначе — `0`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Примеры** + +Запрос: + +```sql +SELECT addr, isIPv4String(addr) FROM ( SELECT ['0.0.0.0', '127.0.0.1', '::ffff:127.0.0.1'] AS addr ) ARRAY JOIN addr +``` + +Результат: + +``` text +┌─addr─────────────┬─isIPv4String(addr)─┐ +│ 0.0.0.0 │ 1 │ +│ 127.0.0.1 │ 1 │ +│ ::ffff:127.0.0.1 │ 0 │ +└──────────────────┴────────────────────┘ +``` + +## isIPv6String {#isipv6string} + +Определяет, является ли строка адресом IPv6 или нет. Также вернет `0`, если `string` — адрес IPv4. + +**Синтаксис** + +```sql +isIPv6String(string) +``` + +**Параметры** + +- `string` — IP адрес. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- `1` если `string` является адресом IPv6 , иначе — `0`. + +Тип: [UInt8](../../sql-reference/data-types/int-uint.md). + +**Примеры** + +Запрос: + +``` sql +SELECT addr, isIPv6String(addr) FROM ( SELECT ['::', '1111::ffff', '::ffff:127.0.0.1', '127.0.0.1'] AS addr ) ARRAY JOIN addr +``` + +Результат: + +``` text +┌─addr─────────────┬─isIPv6String(addr)─┐ +│ :: │ 1 │ +│ 1111::ffff │ 1 │ +│ ::ffff:127.0.0.1 │ 1 │ +│ 127.0.0.1 │ 0 │ +└──────────────────┴────────────────────┘ +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/ip_address_functions/) diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 68afb3e24ce..a738ba755b1 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -183,6 +183,103 @@ SELECT visibleWidth(NULL) Получить размер блока. В ClickHouse выполнение запроса всегда идёт по блокам (наборам кусочков столбцов). Функция позволяет получить размер блока, для которого её вызвали. +## byteSize {#function-bytesize} + +Возвращает оценку в байтах размера аргументов в памяти в несжатом виде. + +**Синтаксис** + +```sql +byteSize(argument [, ...]) +``` + +**Параметры** + +- `argument` — значение. + +**Возвращаемое значение** + +- Оценка размера аргументов в памяти в байтах. + +Тип: [UInt64](../../sql-reference/data-types/int-uint.md). + +**Примеры** + +Для аргументов типа [String](../../sql-reference/data-types/string.md) функция возвращает длину строки + 9 (нуль-терминатор + длина) + +Запрос: + +```sql +SELECT byteSize('string'); +``` + +Результат: + +```text +┌─byteSize('string')─┐ +│ 15 │ +└────────────────────┘ +``` + +Запрос: + +```sql +CREATE TABLE test +( + `key` Int32, + `u8` UInt8, + `u16` UInt16, + `u32` UInt32, + `u64` UInt64, + `i8` Int8, + `i16` Int16, + `i32` Int32, + `i64` Int64, + `f32` Float32, + `f64` Float64 +) +ENGINE = MergeTree +ORDER BY key; + +INSERT INTO test VALUES(1, 8, 16, 32, 64, -8, -16, -32, -64, 32.32, 64.64); + +SELECT key, byteSize(u8) AS `byteSize(UInt8)`, byteSize(u16) AS `byteSize(UInt16)`, byteSize(u32) AS `byteSize(UInt32)`, byteSize(u64) AS `byteSize(UInt64)`, byteSize(i8) AS `byteSize(Int8)`, byteSize(i16) AS `byteSize(Int16)`, byteSize(i32) AS `byteSize(Int32)`, byteSize(i64) AS `byteSize(Int64)`, byteSize(f32) AS `byteSize(Float32)`, byteSize(f64) AS `byteSize(Float64)` FROM test ORDER BY key ASC FORMAT Vertical; +``` + +Result: + +``` text +Row 1: +────── +key: 1 +byteSize(UInt8): 1 +byteSize(UInt16): 2 +byteSize(UInt32): 4 +byteSize(UInt64): 8 +byteSize(Int8): 1 +byteSize(Int16): 2 +byteSize(Int32): 4 +byteSize(Int64): 8 +byteSize(Float32): 4 +byteSize(Float64): 8 +``` + +Если функция принимает несколько аргументов, то она возвращает их совокупный размер в байтах. + +Запрос: + +```sql +SELECT byteSize(NULL, 1, 0.3, ''); +``` + +Результат: + +```text +┌─byteSize(NULL, 1, 0.3, '')─┐ +│ 19 │ +└────────────────────────────┘ +``` + ## materialize(x) {#materializex} Превращает константу в полноценный столбец, содержащий только одно значение. diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index aeb0652cc18..236583c211a 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -597,4 +597,46 @@ Hello, "world"! 'foo' ``` + +## decodeXMLComponent {#decode-xml-component} + +Заменяет символами предопределенные мнемоники XML: `"` `&` `'` `>` `<` +Также эта функция заменяет числовые ссылки соответствующими символами юникод. Поддерживаются десятичная (например, `✓`) и шестнадцатеричная (`✓`) формы. + +**Синтаксис** + +``` sql +decodeXMLComponent(x) +``` + +**Параметры** + +- `x` — последовательность символов. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Строка с произведенными заменами. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +``` sql +SELECT decodeXMLComponent(''foo''); +SELECT decodeXMLComponent('< Σ >'); +``` + +Результат: + +``` text +'foo' +< Σ > +``` + +**Смотрите также** + +- [Мнемоники в HTML](https://ru.wikipedia.org/wiki/%D0%9C%D0%BD%D0%B5%D0%BC%D0%BE%D0%BD%D0%B8%D0%BA%D0%B8_%D0%B2_HTML) + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index e8cbb8deec4..b7193da6f33 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -13,8 +13,6 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u043e\u0438\u Возвращает позицию (в байтах) найденной подстроки в строке, начиная с 1, или 0, если подстрока не найдена. -Работает при допущении, что строка содержит набор байт, представляющий текст в однобайтовой кодировке. Если допущение не выполнено — то возвращает неопределенный результат (не кидает исключение). Если символ может быть представлен с помощью двух байтов, он будет представлен двумя байтами и так далее. - Для поиска без учета регистра используйте функцию [positionCaseInsensitive](#positioncaseinsensitive). **Синтаксис** diff --git a/docs/ru/sql-reference/functions/tuple-map-functions.md b/docs/ru/sql-reference/functions/tuple-map-functions.md index a2b25e68fe5..a36613280a1 100644 --- a/docs/ru/sql-reference/functions/tuple-map-functions.md +++ b/docs/ru/sql-reference/functions/tuple-map-functions.md @@ -5,6 +5,66 @@ toc_title: Работа с контейнерами map # Функции для работы с контейнерами map {#functions-for-working-with-tuple-maps} +## map {#function-map} + +Преобразовывает пары `ключ:значение` в тип данных [Map(key, value)](../../sql-reference/data-types/map.md). + +**Синтаксис** + +``` sql +map(key1, value1[, key2, value2, ...]) +``` + +**Параметры** + +- `key` — ключ. [String](../../sql-reference/data-types/string.md) или [Integer](../../sql-reference/data-types/int-uint.md). +- `value` — значение. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md) или [Array](../../sql-reference/data-types/array.md). + +**Возвращаемое значение** + +- Структура данных в виде пар `ключ:значение`. + +Тип: [Map(key, value)](../../sql-reference/data-types/map.md). + +**Примеры** + +Запрос: + +``` sql +SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); +``` + +Результат: + +``` text +┌─map('key1', number, 'key2', multiply(number, 2))─┐ +│ {'key1':0,'key2':0} │ +│ {'key1':1,'key2':2} │ +│ {'key1':2,'key2':4} │ +└──────────────────────────────────────────────────┘ +``` + +Запрос: + +``` sql +CREATE TABLE table_map (a Map(String, UInt64)) ENGINE = MergeTree() ORDER BY a; +INSERT INTO table_map SELECT map('key1', number, 'key2', number * 2) FROM numbers(3); +SELECT a['key2'] FROM table_map; +``` + +Результат: + +``` text +┌─arrayElement(a, 'key2')─┐ +│ 0 │ +│ 2 │ +│ 4 │ +└─────────────────────────┘ +``` + +**См. также** + +- тип данных [Map(key, value)](../../sql-reference/data-types/map.md) ## mapAdd {#function-mapadd} Собирает все ключи и суммирует соответствующие значения. diff --git a/docs/ru/sql-reference/functions/url-functions.md b/docs/ru/sql-reference/functions/url-functions.md index 1008e2a359c..7541e16bed4 100644 --- a/docs/ru/sql-reference/functions/url-functions.md +++ b/docs/ru/sql-reference/functions/url-functions.md @@ -115,6 +115,168 @@ SELECT topLevelDomain('svn+ssh://www.some.svn-hosting.com:80/repo/trunk') Например, `cutToFirstSignificantSubdomain('https://news.yandex.com.tr/') = 'yandex.com.tr'`. +### cutToFirstSignificantSubdomainCustom {#cuttofirstsignificantsubdomaincustom} + +Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена. Принимает имя пользовательского [списка доменов верхнего уровня](https://ru.wikipedia.org/wiki/Список_доменов_верхнего_уровня). + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +``` sql +cutToFirstSignificantSubdomain(URL, TLD) +``` + +**Parameters** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Результат: + +```text +┌─cutToFirstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo.there-is-no-such-domain │ +└───────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + +### cutToFirstSignificantSubdomainCustomWithWWW {#cuttofirstsignificantsubdomaincustomwithwww} + +Возвращает часть домена, включающую поддомены верхнего уровня до первого существенного поддомена, не опуская "www". Принимает имя пользовательского списка доменов верхнего уровня. + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +```sql +cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) +``` + +**Параметры** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Часть домена, включающая поддомены верхнего уровня до первого существенного поддомена, без удаления `www`. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list'); +``` + +Результат: + +```text +┌─cutToFirstSignificantSubdomainCustomWithWWW('www.foo', 'public_suffix_list')─┐ +│ www.foo │ +└──────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + +### firstSignificantSubdomainCustom {#firstsignificantsubdomaincustom} + +Возвращает первый существенный поддомен. Принимает имя пользовательского списка доменов верхнего уровня. + +Полезно, если требуется актуальный список доменов верхнего уровня или если есть пользовательский. + +Пример конфигурации: + +```xml + + + + public_suffix_list.dat + + +``` + +**Синтаксис** + +```sql +firstSignificantSubdomainCustom(URL, TLD) +``` + +**Параметры** + +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — имя пользовательского списка доменов верхнего уровня. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Первый существенный поддомен. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +```sql +SELECT firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list'); +``` + +Результат: + +```text +┌─firstSignificantSubdomainCustom('bar.foo.there-is-no-such-domain', 'public_suffix_list')─┐ +│ foo │ +└──────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Смотрите также** + +- [firstSignificantSubdomain](#firstsignificantsubdomain). + ### port(URL[, default_port = 0]) {#port} Возвращает порт или значение `default_port`, если в URL-адресе нет порта (или передан невалидный URL) diff --git a/docs/ru/sql-reference/statements/alter/quota.md b/docs/ru/sql-reference/statements/alter/quota.md index 707f56e7cd4..0bdac1381da 100644 --- a/docs/ru/sql-reference/statements/alter/quota.md +++ b/docs/ru/sql-reference/statements/alter/quota.md @@ -5,18 +5,38 @@ toc_title: QUOTA # ALTER QUOTA {#alter-quota-statement} -Изменяет квоту. +Изменяет [квоту](../../../operations/access-rights.md#quotas-management). -## Синтаксис {#alter-quota-syntax} +Синтаксис: ``` sql ALTER QUOTA [IF EXISTS] name [ON CLUSTER cluster_name] [RENAME TO new_name] - [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] - [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR} - {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] | + [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED] + [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} + {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | NO LIMITS | TRACKING ONLY} [,...]] [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` -[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/quota/) \ No newline at end of file +Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md). + +Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). + +В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md). + +**Примеры** + +Ограничить для текущего пользователя максимальное число запросов — не более 123 запросов за каждые 15 месяцев: + +``` sql +ALTER QUOTA IF EXISTS qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER; +``` + +Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов: + +``` sql +ALTER QUOTA IF EXISTS qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default; +``` + +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/alter/quota/) diff --git a/docs/ru/sql-reference/statements/create/quota.md b/docs/ru/sql-reference/statements/create/quota.md index fe18869bf2e..65762071ea2 100644 --- a/docs/ru/sql-reference/statements/create/quota.md +++ b/docs/ru/sql-reference/statements/create/quota.md @@ -7,23 +7,34 @@ toc_title: "\u041a\u0432\u043e\u0442\u0430" Создает [квоту](../../../operations/access-rights.md#quotas-management), которая может быть присвоена пользователю или роли. -### Синтаксис {#create-quota-syntax} +Синтаксис: ``` sql CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] - [KEYED BY {'none' | 'user name' | 'ip address' | 'client key' | 'client key or user name' | 'client key or ip address'}] - [FOR [RANDOMIZED] INTERVAL number {SECOND | MINUTE | HOUR | DAY | WEEK | MONTH | QUARTER | YEAR} - {MAX { {QUERIES | ERRORS | RESULT ROWS | RESULT BYTES | READ ROWS | READ BYTES | EXECUTION TIME} = number } [,...] | + [KEYED BY {user_name | ip_address | client_key | client_key, user_name | client_key, ip_address} | NOT KEYED] + [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} + {MAX { {queries | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | NO LIMITS | TRACKING ONLY} [,...]] [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] ``` +Ключи `user_name`, `ip_address`, `client_key`, `client_key, user_name` и `client_key, ip_address` соответствуют полям таблицы [system.quotas](../../../operations/system-tables/quotas.md). -### Пример {#create-quota-example} +Параметры `queries`, `errors`, `result_rows`, `result_bytes`, `read_rows`, `read_bytes`, `execution_time` соответствуют полям таблицы [system.quotas_usage](../../../operations/system-tables/quotas_usage.md). -Ограничить максимальное количество запросов для текущего пользователя до 123 запросов каждые 15 месяцев: +В секции `ON CLUSTER` можно указать кластеры, на которых создается квота, см. [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md). + +**Примеры** + +Ограничить максимальное количество запросов для текущего пользователя — не более 123 запросов за каждые 15 месяцев: ``` sql -CREATE QUOTA qA FOR INTERVAL 15 MONTH MAX QUERIES 123 TO CURRENT_USER +CREATE QUOTA qA FOR INTERVAL 15 month MAX queries = 123 TO CURRENT_USER; +``` + +Ограничить по умолчанию максимальное время выполнения запроса — не более полсекунды за каждые 30 минут, а также максимальное число запросов — не более 321 и максимальное число ошибок — не более 10 за каждые 5 кварталов: + +``` sql +CREATE QUOTA qB FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default; ``` [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/quota) diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index 09026874948..f4b91b5ae17 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -56,9 +56,10 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na Недоработано выполнение запросов `ALTER` над материализованными представлениями, поэтому они могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления. +Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние. + Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`. Отсутствует отдельный запрос для удаления представлений. Чтобы удалить представление, следует использовать `DROP TABLE`. -[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) - +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index d83f6691f6b..0ad85ed0166 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -13,7 +13,7 @@ toc_title: INSERT INTO INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` -Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`. +Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). В качестве примера рассмотрим таблицу: @@ -63,8 +63,6 @@ SELECT * FROM insert_select_testtable - Значения, вычисляемые из `DEFAULT` выражений, указанных в определении таблицы. - Нули и пустые строки, если `DEFAULT` не определены. -Если [strict_insert_defaults=1](../../operations/settings/settings.md), то столбцы, для которых не определены `DEFAULT`, необходимо перечислить в запросе. - В INSERT можно передавать данные любого [формата](../../interfaces/formats.md#formats), который поддерживает ClickHouse. Для этого формат необходимо указать в запросе в явном виде: ``` sql diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md index c37e82ae0be..b0b6e80d7be 100644 --- a/docs/ru/sql-reference/statements/select/index.md +++ b/docs/ru/sql-reference/statements/select/index.md @@ -162,6 +162,112 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of Подробнее смотрите в разделе «Настройки». Присутствует возможность использовать внешнюю сортировку (с сохранением временных данных на диск) и внешнюю агрегацию. +## Модификаторы запроса SELECT {#select-modifiers} + +Вы можете использовать следующие модификаторы в запросах `SELECT`. + +### APPLY {#apply-modifier} + +Вызывает указанную функцию для каждой строки, возвращаемой внешним табличным выражением запроса. + +**Синтаксис:** + +``` sql +SELECT APPLY( ) FROM [db.]table_name +``` + +**Пример:** + +``` sql +CREATE TABLE columns_transformers (i Int64, j Int16, k Int64) ENGINE = MergeTree ORDER by (i); +INSERT INTO columns_transformers VALUES (100, 10, 324), (120, 8, 23); +SELECT * APPLY(sum) FROM columns_transformers; +``` + +``` +┌─sum(i)─┬─sum(j)─┬─sum(k)─┐ +│ 220 │ 18 │ 347 │ +└────────┴────────┴────────┘ +``` + +### EXCEPT {#except-modifier} + +Исключает из результата запроса один или несколько столбцов. + +**Синтаксис:** + +``` sql +SELECT EXCEPT ( col_name1 [, col_name2, col_name3, ...] ) FROM [db.]table_name +``` + +**Пример:** + +``` sql +SELECT * EXCEPT (i) from columns_transformers; +``` + +``` +┌──j─┬───k─┐ +│ 10 │ 324 │ +│ 8 │ 23 │ +└────┴─────┘ +``` + +### REPLACE {#replace-modifier} + +Определяет одно или несколько [выражений алиасов](../../../sql-reference/syntax.md#syntax-expression_aliases). Каждый алиас должен соответствовать имени столбца из запроса `SELECT *`. В списке столбцов результата запроса имя столбца, соответствующее алиасу, заменяется выражением в модификаторе `REPLACE`. + +Этот модификатор не изменяет имена или порядок столбцов. Однако он может изменить значение и тип значения. + +**Синтаксис:** + +``` sql +SELECT REPLACE( AS col_name) from [db.]table_name +``` + +**Пример:** + +``` sql +SELECT * REPLACE(i + 1 AS i) from columns_transformers; +``` + +``` +┌───i─┬──j─┬───k─┐ +│ 101 │ 10 │ 324 │ +│ 121 │ 8 │ 23 │ +└─────┴────┴─────┘ +``` + +### Комбинации модификаторов {#modifier-combinations} + +Вы можете использовать каждый модификатор отдельно или комбинировать их. + +**Примеры:** + +Использование одного и того же модификатора несколько раз. + +``` sql +SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) from columns_transformers; +``` + +``` +┌─max(length(toString(j)))─┬─max(length(toString(k)))─┐ +│ 2 │ 3 │ +└──────────────────────────┴──────────────────────────┘ +``` + +Использование нескольких модификаторов в одном запросе. + +``` sql +SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers; +``` + +``` +┌─sum(plus(i, 1))─┬─sum(k)─┐ +│ 222 │ 347 │ +└─────────────────┴────────┘ +``` + ## SETTINGS в запросе SELECT {#settings-in-select} Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. @@ -174,5 +280,4 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1; ``` -[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/) - +[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/) diff --git a/docs/zh/development/style.md b/docs/zh/development/style.md index 8f104e3a7d8..c8e883920dd 100644 --- a/docs/zh/development/style.md +++ b/docs/zh/development/style.md @@ -118,7 +118,7 @@ for (auto & stream : streams) stream.second->finalize(); ``` -**18.** 行的某尾不应该包含空格。 +**18.** 行的末尾不应该包含空格。 **19.** 源文件应该用 UTF-8 编码。 diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 2fffcbe7ef3..353dd5f5bc8 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -401,7 +401,7 @@ TTL date_time + INTERVAL 15 HOUR ### 列 TTL {#mergetree-column-ttl} -当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中此列。 +当列中的值过期时, ClickHouse会将它们替换成该列数据类型的默认值。如果数据片段中列的所有值均已过期,则ClickHouse 会从文件系统中的数据片段中删除此列。 `TTL`子句不能被用于主键字段。 diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 7a0a42fa47c..3b89da9f595 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -37,7 +37,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] VersionedCollapsingMergeTree(sign, version) ``` -- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 划 +- `sign` — 指定行类型的列名: `1` 是一个 “state” 行, `-1` 是一个 “cancel” 行 列数据类型应为 `Int8`. diff --git a/docs/zh/operations/backup.md b/docs/zh/operations/backup.md index 72491bb53ff..1b1993e3ae6 100644 --- a/docs/zh/operations/backup.md +++ b/docs/zh/operations/backup.md @@ -7,7 +7,7 @@ toc_title: "\u6570\u636E\u5907\u4EFD" # 数据备份 {#data-backup} -尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). 但是,这些保障措施不能涵盖所有可能的情况,并且可以规避。 +尽管[副本](../engines/table-engines/mergetree-family/replication.md) 可以预防硬件错误带来的数据丢失, 但是它不能防止人为操作的错误: 意外删除数据, 删除错误的 table 或者删除错误 cluster 上的 table, 可以导致错误数据处理错误或者数据损坏的 bugs. 这类意外可能会影响所有的副本. ClickHouse 有内建的保障措施可以预防一些错误 — 例如, 默认情况下[您不能使用类似MergeTree的引擎删除包含超过50Gb数据的表](server-configuration-parameters/settings.md#max-table-size-to-drop). 但是,这些保障措施不能涵盖所有可能的情况,并且可以规避。 为了有效地减少可能的人为错误,您应该 **提前**准备备份和还原数据的策略. diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index f834ab74f5a..64625c19c6a 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1310,3 +1310,14 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1; **另请参阅** - [IN 运算符中的 NULL 处理](../../sql-reference/operators/in.md#in-null-processing) + +## max_final_threads {#max-final-threads} + +设置使用[FINAL](../../sql-reference/statements/select/from.md#select-from-final) 限定符的`SELECT`查询, 在数据读取阶段的最大并发线程数。 + +可能的值: + +- 正整数。 +- 0 or 1 — 禁用。 此时`SELECT` 查询单线程执行。 + +默认值: `16`。 diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md index b66e5262df3..f7e816ccee6 100644 --- a/docs/zh/operations/system-tables/zookeeper.md +++ b/docs/zh/operations/system-tables/zookeeper.md @@ -6,12 +6,16 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 # 系统。动物园管理员 {#system-zookeeper} 如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 -查询必须具有 ‘path’ WHERE子句中的平等条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 +查询必须具有 ‘path’ WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。 要输出所有根节点的数据,write path= ‘/’. 如果在指定的路径 ‘path’ 不存在,将引发异常。 +查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。 +如果在指定的 ‘path’ 集合中有不存在的路径,将引发异常。 +它可以用来做一批ZooKeeper路径查询。 + 列: - `name` (String) — The name of the node. diff --git a/docs/zh/sql-reference/aggregate-functions/index.md b/docs/zh/sql-reference/aggregate-functions/index.md index 436a8f433ea..2344c3e6dc0 100644 --- a/docs/zh/sql-reference/aggregate-functions/index.md +++ b/docs/zh/sql-reference/aggregate-functions/index.md @@ -1,11 +1,12 @@ --- +toc_folder_title: 聚合函数 toc_priority: 33 -toc_title: 聚合函数 +toc_title: 简介 --- # 聚合函数 {#aggregate-functions} -聚合函数在 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 方式如预期的数据库专家。 +聚合函数如数据库专家预期的方式 [正常](http://www.sql-tutorial.com/sql-aggregate-functions-sql-tutorial) 工作。 ClickHouse还支持: @@ -14,7 +15,7 @@ ClickHouse还支持: ## 空处理 {#null-processing} -在聚合过程中,所有 `NULL`s被跳过。 +在聚合过程中,所有 `NULL` 被跳过。 **例:** @@ -30,7 +31,7 @@ ClickHouse还支持: └───┴──────┘ ``` -比方说,你需要在总的值 `y` 列: +比方说,你需要计算 `y` 列的总数: ``` sql SELECT sum(y) FROM t_null_big @@ -40,9 +41,8 @@ SELECT sum(y) FROM t_null_big │ 7 │ └────────┘ -该 `sum` 函数解释 `NULL` 作为 `0`. 特别是,这意味着,如果函数接收输入的选择,其中所有的值 `NULL`,那么结果将是 `0`,不 `NULL`. -现在你可以使用 `groupArray` 函数从创建一个数组 `y` 列: +现在你可以使用 `groupArray` 函数用 `y` 列创建一个数组: ``` sql SELECT groupArray(y) FROM t_null_big @@ -54,6 +54,6 @@ SELECT groupArray(y) FROM t_null_big └───────────────┘ ``` -`groupArray` 不包括 `NULL` 在生成的数组中。 +在 `groupArray` 生成的数组中不包括 `NULL`。 [原始文章](https://clickhouse.tech/docs/en/query_language/agg_functions/) diff --git a/docs/zh/sql-reference/aggregate-functions/reference.md b/docs/zh/sql-reference/aggregate-functions/reference.md index cf7dddb9b7e..3a224886a00 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference.md +++ b/docs/zh/sql-reference/aggregate-functions/reference.md @@ -1,9 +1,9 @@ --- toc_priority: 36 -toc_title: 聚合函数 +toc_title: 参考手册 --- -# 聚合函数引用 {#aggregate-functions-reference} +# 参考手册 {#aggregate-functions-reference} ## count {#agg_function-count} diff --git a/docs/zh/sql-reference/statements/select/from.md b/docs/zh/sql-reference/statements/select/from.md index a8b49febab5..71b7cd319eb 100644 --- a/docs/zh/sql-reference/statements/select/from.md +++ b/docs/zh/sql-reference/statements/select/from.md @@ -25,11 +25,13 @@ toc_title: FROM - [Replicated](../../../engines/table-engines/mergetree-family/replication.md) 版本 `MergeTree` 引擎 - [View](../../../engines/table-engines/special/view.md), [Buffer](../../../engines/table-engines/special/buffer.md), [Distributed](../../../engines/table-engines/special/distributed.md),和 [MaterializedView](../../../engines/table-engines/special/materializedview.md) 在其他引擎上运行的引擎,只要是它们底层是 `MergeTree`-引擎表即可。 +现在使用 `FINAL` 修饰符 的 `SELECT` 查询启用了并发执行, 这会快一点。但是仍然存在缺陷 (见下)。 [max_final_threads](../../../operations/settings/settings.md#max-final-threads) 设置使用的最大线程数限制。 + ### 缺点 {#drawbacks} -使用的查询 `FINAL` 执行速度不如类似的查询那么快,因为: +使用的查询 `FINAL` 执行速度比类似的查询慢一点,因为: -- 查询在单个线程中执行,并在查询执行期间合并数据。 +- 在查询执行期间合并数据。 - 查询与 `FINAL` 除了读取查询中指定的列之外,还读取主键列。 **在大多数情况下,避免使用 `FINAL`.** 常见的方法是使用假设后台进程的不同查询 `MergeTree` 引擎还没有发生,并通过应用聚合(例如,丢弃重复项)来处理它。 {## TODO: examples ##} diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index ae1d16ce402..a0e2ea155ba 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -62,12 +62,12 @@ public: bool randomize_, size_t max_iterations_, double max_time_, const String & json_path_, size_t confidence_, const String & query_id_, const String & query_to_execute_, bool continue_on_errors_, - bool print_stacktrace_, const Settings & settings_) + bool reconnect_, bool print_stacktrace_, const Settings & settings_) : concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_), cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), json_path(json_path_), confidence(confidence_), query_id(query_id_), - query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), + query_to_execute(query_to_execute_), continue_on_errors(continue_on_errors_), reconnect(reconnect_), print_stacktrace(print_stacktrace_), settings(settings_), shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())), pool(concurrency) @@ -155,6 +155,7 @@ private: String query_id; String query_to_execute; bool continue_on_errors; + bool reconnect; bool print_stacktrace; const Settings & settings; SharedContextHolder shared_context; @@ -404,9 +405,14 @@ private: void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index) { Stopwatch watch; + + Connection & connection = **connection_entries[connection_index]; + + if (reconnect) + connection.disconnect(); + RemoteBlockInputStream stream( - *(*connection_entries[connection_index]), - query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); + connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); if (!query_id.empty()) stream.setQueryId(query_id); @@ -589,6 +595,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("query_id", value()->default_value(""), "") ("continue_on_errors", "continue testing even if a query fails") + ("reconnect", "establish new connection for every query") ; Settings settings; @@ -638,7 +645,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["confidence"].as(), options["query_id"].as(), options["query"].as(), - options.count("continue_on_errors") > 0, + options.count("continue_on_errors"), + options.count("reconnect"), print_stacktrace, settings); return benchmark.run(); diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 06bd7d84526..3c27908741c 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -932,6 +932,10 @@ private: std::cerr << "Received exception from server (version " << server_version << "):" << std::endl << "Code: " << server_exception->code() << ". " << text << std::endl; + if (is_interactive) + { + std::cerr << std::endl; + } } if (client_exception) @@ -939,6 +943,10 @@ private: fmt::print(stderr, "Error on processing query '{}':\n{}\n", full_query, client_exception->message()); + if (is_interactive) + { + fmt::print(stderr, "\n"); + } } // A debug check -- at least some exception must be set, if the error @@ -1366,7 +1374,30 @@ private: { // Probably the server is dead because we found an assertion // failure. Fail fast. - fmt::print(stderr, "Lost connection to the server\n"); + fmt::print(stderr, "Lost connection to the server.\n"); + + // Print the changed settings because they might be needed to + // reproduce the error. + const auto & changes = context.getSettingsRef().changes(); + if (!changes.empty()) + { + fmt::print(stderr, "Changed settings: "); + for (size_t i = 0; i < changes.size(); ++i) + { + if (i) + { + fmt::print(stderr, ", "); + } + fmt::print(stderr, "{} = '{}'", changes[i].name, + toString(changes[i].value)); + } + fmt::print(stderr, "\n"); + } + else + { + fmt::print(stderr, "No changed settings.\n"); + } + return false; } @@ -1711,7 +1742,7 @@ private: } // Remember where the data ended. We use this info later to determine // where the next query begins. - parsed_insert_query->end = data_in.buffer().begin() + data_in.count(); + parsed_insert_query->end = parsed_insert_query->data + data_in.count(); } else if (!is_interactive) { @@ -1892,6 +1923,9 @@ private: switch (packet.type) { + case Protocol::Server::PartUUIDs: + return true; + case Protocol::Server::Data: if (!cancelled) onData(packet.block); diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp index ae0de450a10..8d8d8daaf39 100644 --- a/programs/client/QueryFuzzer.cpp +++ b/programs/client/QueryFuzzer.cpp @@ -325,6 +325,61 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast) // the generic recursion into IAST.children. } +void QueryFuzzer::fuzzWindowFrame(WindowFrame & frame) +{ + switch (fuzz_rand() % 40) + { + case 0: + { + const auto r = fuzz_rand() % 3; + frame.type = r == 0 ? WindowFrame::FrameType::Rows + : r == 1 ? WindowFrame::FrameType::Range + : WindowFrame::FrameType::Groups; + break; + } + case 1: + { + const auto r = fuzz_rand() % 3; + frame.begin_type = r == 0 ? WindowFrame::BoundaryType::Unbounded + : r == 1 ? WindowFrame::BoundaryType::Current + : WindowFrame::BoundaryType::Offset; + break; + } + case 2: + { + const auto r = fuzz_rand() % 3; + frame.end_type = r == 0 ? WindowFrame::BoundaryType::Unbounded + : r == 1 ? WindowFrame::BoundaryType::Current + : WindowFrame::BoundaryType::Offset; + break; + } + case 3: + { + frame.begin_offset = getRandomField(0).get(); + break; + } + case 4: + { + frame.end_offset = getRandomField(0).get(); + break; + } + case 5: + { + frame.begin_preceding = fuzz_rand() % 2; + break; + } + case 6: + { + frame.end_preceding = fuzz_rand() % 2; + break; + } + default: + break; + } + + frame.is_default = (frame == WindowFrame{}); +} + void QueryFuzzer::fuzz(ASTs & asts) { for (auto & ast : asts) @@ -409,6 +464,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast) auto & def = fn->window_definition->as(); fuzzColumnLikeExpressionList(def.partition_by.get()); fuzzOrderByList(def.order_by.get()); + fuzzWindowFrame(def.frame); } fuzz(fn->children); @@ -421,6 +477,23 @@ void QueryFuzzer::fuzz(ASTPtr & ast) fuzz(select->children); } + /* + * The time to fuzz the settings has not yet come. + * Apparently we don't have any infractructure to validate the values of + * the settings, and the first query with max_block_size = -1 breaks + * because of overflows here and there. + *//* + * else if (auto * set = typeid_cast(ast.get())) + * { + * for (auto & c : set->changes) + * { + * if (fuzz_rand() % 50 == 0) + * { + * c.value = fuzzField(c.value); + * } + * } + * } + */ else if (auto * literal = typeid_cast(ast.get())) { // There is a caveat with fuzzing the children: many ASTs also keep the diff --git a/programs/client/QueryFuzzer.h b/programs/client/QueryFuzzer.h index e9d3f150283..38714205967 100644 --- a/programs/client/QueryFuzzer.h +++ b/programs/client/QueryFuzzer.h @@ -14,6 +14,7 @@ namespace DB class ASTExpressionList; class ASTOrderByElement; +struct WindowFrame; /* * This is an AST-based query fuzzer that makes random modifications to query @@ -65,6 +66,7 @@ struct QueryFuzzer void fuzzOrderByElement(ASTOrderByElement * elem); void fuzzOrderByList(IAST * ast); void fuzzColumnLikeExpressionList(IAST * ast); + void fuzzWindowFrame(WindowFrame & frame); void fuzz(ASTs & asts); void fuzz(ASTPtr & ast); void collectFuzzInfoMain(const ASTPtr ast); diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index 87083c2c27b..dfa7048349e 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -1,5 +1,6 @@ #include "Suggest.h" +#include #include #include @@ -86,6 +87,9 @@ Suggest::Suggest() void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit) { + /// NOTE: Once you will update the completion list, + /// do not forget to update 01676_clickhouse_client_autocomplete.sh + std::stringstream query; // STYLE_CHECK_ALLOW_STD_STRING_STREAM query << "SELECT DISTINCT arrayJoin(extractAll(name, '[\\\\w_]{2,}')) AS res FROM (" "SELECT name FROM system.functions" @@ -104,6 +108,18 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo " UNION ALL " "SELECT cluster FROM system.clusters" " UNION ALL " + "SELECT name FROM system.errors" + " UNION ALL " + "SELECT event FROM system.events" + " UNION ALL " + "SELECT metric FROM system.asynchronous_metrics" + " UNION ALL " + "SELECT metric FROM system.metrics" + " UNION ALL " + "SELECT macro FROM system.macros" + " UNION ALL " + "SELECT policy_name FROM system.storage_policies" + " UNION ALL " "SELECT concat(func.name, comb.name) FROM system.functions AS func CROSS JOIN system.aggregate_function_combinators AS comb WHERE is_aggregate"; /// The user may disable loading of databases, tables, columns by setting suggestion_limit to zero. @@ -123,12 +139,17 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo query << ") WHERE notEmpty(res)"; - fetch(connection, timeouts, query.str()); + Settings settings; + /// To show all rows from: + /// - system.errors + /// - system.events + settings.system_events_show_zero_values = true; + fetch(connection, timeouts, query.str(), settings); } -void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query) +void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings) { - connection.sendQuery(timeouts, query); + connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete, &settings); while (true) { diff --git a/programs/client/Suggest.h b/programs/client/Suggest.h index 03332088cbe..0049bc08ebf 100644 --- a/programs/client/Suggest.h +++ b/programs/client/Suggest.h @@ -33,7 +33,7 @@ public: private: void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit); - void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query); + void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings); void fillWordsFromBlock(const Block & block); /// Words are fetched asynchronously. diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index c073ab38aea..66e7afd8f8c 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -29,4 +29,25 @@ {display_name} \x01\e[1;32m\x02:)\x01\e[0m\x02 {display_name} \x01\e[1;31m\x02:)\x01\e[0m\x02 + + diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index ca09e7c1889..7eea23160b2 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -316,9 +316,6 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts) } } - /// Delete helping tables in both cases (whole table is done or not) - dropHelpingTables(task_table); - if (!table_is_done) { throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution", @@ -642,7 +639,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) + ((partition_name == "'all'") ? " PARTITION ID " : " PARTITION ") + partition_name + " DEDUPLICATE;"; - LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string); + LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_deduplicate_ast_string); UInt64 num_nodes = executeQueryOnCluster( task_table.cluster_push, @@ -1044,6 +1041,11 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab { LOG_INFO(log, "Table {} is not processed yet.Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions); } + else + { + /// Delete helping tables in case that whole table is done + dropHelpingTables(task_table); + } return table_is_done; } diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index ee4daa3e16d..14fa734f246 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -4,14 +4,14 @@ # include # include -# include +# include # include # include # include # include # include # include -# include +# include # include # include # include @@ -59,16 +59,16 @@ namespace } } -void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - Poco::Net::HTMLForm params(request, request.stream()); + HTMLForm params(request, request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); auto process_error = [&response, this](const std::string & message) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - response.send() << message << std::endl; + *response.send() << message << std::endl; LOG_WARNING(log, message); }; @@ -159,8 +159,16 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques columns.emplace_back(reinterpret_cast(column_name), std::move(column_type)); } - WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); - writeStringBinary(columns.toString(), out); + WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); + try + { + writeStringBinary(columns.toString(), out); + out.finalize(); + } + catch (...) + { + out.finalize(); + } } catch (...) { diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index 04b4c06693b..9b5b470b31d 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -3,10 +3,11 @@ #if USE_ODBC # include -# include -# include +# include # include +# include + /** The structure of the table is taken from the query "SELECT * FROM table WHERE 1=0". * TODO: It would be much better to utilize ODBC methods dedicated for columns description. * If there is no such table, an exception is thrown. @@ -14,7 +15,7 @@ namespace DB { -class ODBCColumnsInfoHandler : public Poco::Net::HTTPRequestHandler +class ODBCColumnsInfoHandler : public HTTPRequestHandler { public: ODBCColumnsInfoHandler(size_t keep_alive_timeout_, Context & context_) @@ -22,7 +23,7 @@ public: { } - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: Poco::Logger * log; diff --git a/programs/odbc-bridge/HandlerFactory.cpp b/programs/odbc-bridge/HandlerFactory.cpp index 0cc40480b87..9ac48af4ace 100644 --- a/programs/odbc-bridge/HandlerFactory.cpp +++ b/programs/odbc-bridge/HandlerFactory.cpp @@ -7,39 +7,40 @@ namespace DB { -Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco::Net::HTTPServerRequest & request) + +std::unique_ptr HandlerFactory::createRequestHandler(const HTTPServerRequest & request) { Poco::URI uri{request.getURI()}; LOG_TRACE(log, "Request URI: {}", uri.toString()); if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET) - return new PingHandler(keep_alive_timeout); + return std::make_unique(keep_alive_timeout); if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST) { if (uri.getPath() == "/columns_info") #if USE_ODBC - return new ODBCColumnsInfoHandler(keep_alive_timeout, context); + return std::make_unique(keep_alive_timeout, context); #else return nullptr; #endif else if (uri.getPath() == "/identifier_quote") #if USE_ODBC - return new IdentifierQuoteHandler(keep_alive_timeout, context); + return std::make_unique(keep_alive_timeout, context); #else return nullptr; #endif else if (uri.getPath() == "/schema_allowed") #if USE_ODBC - return new SchemaAllowedHandler(keep_alive_timeout, context); + return std::make_unique(keep_alive_timeout, context); #else return nullptr; #endif else if (uri.getPath() == "/write") - return new ODBCHandler(pool_map, keep_alive_timeout, context, "write"); + return std::make_unique(pool_map, keep_alive_timeout, context, "write"); else - return new ODBCHandler(pool_map, keep_alive_timeout, context, "read"); + return std::make_unique(pool_map, keep_alive_timeout, context, "read"); } return nullptr; } diff --git a/programs/odbc-bridge/HandlerFactory.h b/programs/odbc-bridge/HandlerFactory.h index 1d4edfc9dd1..5dce6f02ecd 100644 --- a/programs/odbc-bridge/HandlerFactory.h +++ b/programs/odbc-bridge/HandlerFactory.h @@ -1,16 +1,17 @@ #pragma once + #include -#include -#include -#include -#include "MainHandler.h" +#include #include "ColumnInfoHandler.h" #include "IdentifierQuoteHandler.h" +#include "MainHandler.h" #include "SchemaAllowedHandler.h" +#include + #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" - #include +#include #pragma GCC diagnostic pop @@ -19,7 +20,7 @@ namespace DB /** Factory for '/ping', '/', '/columns_info', '/identifier_quote', '/schema_allowed' handlers. * Also stores Session pools for ODBC connections */ -class HandlerFactory : public Poco::Net::HTTPRequestHandlerFactory +class HandlerFactory : public HTTPRequestHandlerFactory { public: HandlerFactory(const std::string & name_, size_t keep_alive_timeout_, Context & context_) @@ -28,7 +29,7 @@ public: pool_map = std::make_shared(); } - Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest & request) override; + std::unique_ptr createRequestHandler(const HTTPServerRequest & request) override; private: Poco::Logger * log; diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index 2c3701cfff9..5060d37c479 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -3,14 +3,14 @@ #if USE_ODBC # include -# include +# include +# include # include # include # include # include # include # include -# include # include # include # include @@ -22,16 +22,16 @@ namespace DB { -void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - Poco::Net::HTMLForm params(request, request.stream()); + HTMLForm params(request, request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); auto process_error = [&response, this](const std::string & message) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - response.send() << message << std::endl; + *response.send() << message << std::endl; LOG_WARNING(log, message); }; @@ -49,8 +49,16 @@ void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & reques auto identifier = getIdentifierQuote(hdbc); - WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); - writeStringBinary(identifier, out); + WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); + try + { + writeStringBinary(identifier, out); + out.finalize(); + } + catch (...) + { + out.finalize(); + } } catch (...) { diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.h b/programs/odbc-bridge/IdentifierQuoteHandler.h index fd357e32786..dad88c72ad8 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.h +++ b/programs/odbc-bridge/IdentifierQuoteHandler.h @@ -1,8 +1,9 @@ #pragma once #include +#include + #include -#include #if USE_ODBC @@ -10,7 +11,7 @@ namespace DB { -class IdentifierQuoteHandler : public Poco::Net::HTTPRequestHandler +class IdentifierQuoteHandler : public HTTPRequestHandler { public: IdentifierQuoteHandler(size_t keep_alive_timeout_, Context &) @@ -18,7 +19,7 @@ public: { } - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: Poco::Logger * log; diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 64cb7bc0b46..4fcc9deea6a 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -73,19 +74,19 @@ ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str) return pool_map->at(connection_str); } -void ODBCHandler::processError(Poco::Net::HTTPServerResponse & response, const std::string & message) +void ODBCHandler::processError(HTTPServerResponse & response, const std::string & message) { - response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); + response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - response.send() << message << std::endl; + *response.send() << message << std::endl; LOG_WARNING(log, message); } -void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - Poco::Net::HTMLForm params(request); + HTMLForm params(request); if (mode == "read") - params.read(request.stream()); + params.read(request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); if (mode == "read" && !params.has("query")) @@ -136,7 +137,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne std::string connection_string = params.get("connection_string"); LOG_TRACE(log, "Connection string: '{}'", connection_string); - WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); + WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); try { @@ -163,9 +164,8 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne #endif auto pool = getPool(connection_string); - ReadBufferFromIStream read_buf(request.stream()); - auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, - context, max_block_size); + auto & read_buf = request.getStream(); + auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, context, max_block_size); auto input_stream = std::make_shared(input_format); ODBCBlockOutputStream output_stream(pool->get(), db_name, table_name, *sample_block, quoting_style); copyData(*input_stream, output_stream); @@ -187,9 +187,27 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne auto message = getCurrentExceptionMessage(true); response.setStatusAndReason( Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, because of too soon response sending - writeStringBinary(message, out); - tryLogCurrentException(log); + try + { + writeStringBinary(message, out); + out.finalize(); + } + catch (...) + { + tryLogCurrentException(log); + } + + tryLogCurrentException(log); + } + + try + { + out.finalize(); + } + catch (...) + { + tryLogCurrentException(log); } } diff --git a/programs/odbc-bridge/MainHandler.h b/programs/odbc-bridge/MainHandler.h index ec5e6693a60..e237ede5814 100644 --- a/programs/odbc-bridge/MainHandler.h +++ b/programs/odbc-bridge/MainHandler.h @@ -1,12 +1,13 @@ #pragma once #include +#include + #include -#include #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" - #include +#include #pragma GCC diagnostic pop namespace DB @@ -16,7 +17,7 @@ namespace DB * and also query in request body * response in RowBinary format */ -class ODBCHandler : public Poco::Net::HTTPRequestHandler +class ODBCHandler : public HTTPRequestHandler { public: using PoolPtr = std::shared_ptr; @@ -34,7 +35,7 @@ public: { } - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: Poco::Logger * log; @@ -47,7 +48,7 @@ private: static inline std::mutex mutex; PoolPtr getPool(const std::string & connection_str); - void processError(Poco::Net::HTTPServerResponse & response, const std::string & message); + void processError(HTTPServerResponse & response, const std::string & message); }; } diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp index 9deefaf7895..8869a2639c1 100644 --- a/programs/odbc-bridge/ODBCBridge.cpp +++ b/programs/odbc-bridge/ODBCBridge.cpp @@ -11,7 +11,6 @@ # include #endif -#include #include #include #include @@ -23,6 +22,7 @@ #include #include #include +#include namespace DB @@ -212,8 +212,12 @@ int ODBCBridge::main(const std::vector & /*args*/) SensitiveDataMasker::setInstance(std::make_unique(config(), "query_masking_rules")); } - auto server = Poco::Net::HTTPServer( - new HandlerFactory("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context), server_pool, socket, http_params); + auto server = HTTPServer( + context, + std::make_shared("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context), + server_pool, + socket, + http_params); server.start(); LOG_INFO(log, "Listening http://{}", address.toString()); diff --git a/programs/odbc-bridge/PingHandler.cpp b/programs/odbc-bridge/PingHandler.cpp index b0313e46bf3..e3ab5e5cd00 100644 --- a/programs/odbc-bridge/PingHandler.cpp +++ b/programs/odbc-bridge/PingHandler.cpp @@ -6,7 +6,7 @@ namespace DB { -void PingHandler::handleRequest(Poco::Net::HTTPServerRequest & /*request*/, Poco::Net::HTTPServerResponse & response) +void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response) { try { diff --git a/programs/odbc-bridge/PingHandler.h b/programs/odbc-bridge/PingHandler.h index d8109a50bb6..c969ec55af7 100644 --- a/programs/odbc-bridge/PingHandler.h +++ b/programs/odbc-bridge/PingHandler.h @@ -1,17 +1,19 @@ #pragma once -#include + +#include namespace DB { -/** Simple ping handler, answers "Ok." to GET request - */ -class PingHandler : public Poco::Net::HTTPRequestHandler + +/// Simple ping handler, answers "Ok." to GET request +class PingHandler : public HTTPRequestHandler { public: - PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {} - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {} + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: size_t keep_alive_timeout; }; + } diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index fa08a27da59..d4a70db61f4 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -2,12 +2,12 @@ #if USE_ODBC -# include +# include +# include # include # include # include # include -# include # include # include # include @@ -33,16 +33,16 @@ namespace } -void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) +void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) { - Poco::Net::HTMLForm params(request, request.stream()); + HTMLForm params(request, request.getStream()); LOG_TRACE(log, "Request URI: {}", request.getURI()); auto process_error = [&response, this](const std::string & message) { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); if (!response.sent()) - response.send() << message << std::endl; + *response.send() << message << std::endl; LOG_WARNING(log, message); }; @@ -60,8 +60,16 @@ void SchemaAllowedHandler::handleRequest(Poco::Net::HTTPServerRequest & request, bool result = isSchemaAllowed(hdbc); - WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout); - writeBoolText(result, out); + WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); + try + { + writeBoolText(result, out); + out.finalize(); + } + catch (...) + { + out.finalize(); + } } catch (...) { diff --git a/programs/odbc-bridge/SchemaAllowedHandler.h b/programs/odbc-bridge/SchemaAllowedHandler.h index 76aa23b903c..91eddf67803 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.h +++ b/programs/odbc-bridge/SchemaAllowedHandler.h @@ -1,17 +1,18 @@ #pragma once +#include + #include -#include #if USE_ODBC namespace DB { + class Context; - -/// This handler establishes connection to database, and retrieve whether schema is allowed. -class SchemaAllowedHandler : public Poco::Net::HTTPRequestHandler +/// This handler establishes connection to database, and retrieves whether schema is allowed. +class SchemaAllowedHandler : public HTTPRequestHandler { public: SchemaAllowedHandler(size_t keep_alive_timeout_, Context &) @@ -19,7 +20,7 @@ public: { } - void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override; + void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override; private: Poco::Logger * log; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 2bb5181d348..f501e182cb7 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -59,7 +59,6 @@ #include #include #include -#include #include "MetricsTransmitter.h" #include #include @@ -70,6 +69,7 @@ #include #include #include +#include #if !defined(ARCADIA_BUILD) @@ -94,12 +94,16 @@ # include #endif +#if USE_NURAFT +# include +#endif namespace CurrentMetrics { extern const Metric Revision; extern const Metric VersionInteger; extern const Metric MemoryTracking; + extern const Metric MaxDDLEntryID; } @@ -842,23 +846,33 @@ int Server::main(const std::vector & /*args*/) listen_try = true; } - for (const auto & listen_host : listen_hosts) + if (config().has("test_keeper_server")) { - /// TCP TestKeeper - const char * port_name = "test_keeper_server.tcp_port"; - createServer(listen_host, port_name, listen_try, [&](UInt16 port) +#if USE_NURAFT + /// Initialize test keeper RAFT. Do nothing if no nu_keeper_server in config. + global_context->initializeNuKeeperStorageDispatcher(); + for (const auto & listen_host : listen_hosts) { - Poco::Net::ServerSocket socket; - auto address = socketBindListen(socket, listen_host, port); - socket.setReceiveTimeout(settings.receive_timeout); - socket.setSendTimeout(settings.send_timeout); - servers_to_start_before_tables->emplace_back( - port_name, - std::make_unique( - new TestKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + /// TCP NuKeeper + const char * port_name = "test_keeper_server.tcp_port"; + createServer(listen_host, port_name, listen_try, [&](UInt16 port) + { + Poco::Net::ServerSocket socket; + auto address = socketBindListen(socket, listen_host, port); + socket.setReceiveTimeout(settings.receive_timeout); + socket.setSendTimeout(settings.send_timeout); + servers_to_start_before_tables->emplace_back( + port_name, + std::make_unique( + new NuKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams)); + + LOG_INFO(log, "Listening for connections to NuKeeper (tcp): {}", address.toString()); + }); + } +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination."); +#endif - LOG_INFO(log, "Listening for connections to fake zookeeper (tcp): {}", address.toString()); - }); } for (auto & server : *servers_to_start_before_tables) @@ -898,6 +912,8 @@ int Server::main(const std::vector & /*args*/) LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections); else LOG_INFO(log, "Closed connections to servers for tables."); + + global_context->shutdownNuKeeperStorageDispatcher(); } /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. @@ -997,7 +1013,8 @@ int Server::main(const std::vector & /*args*/) int pool_size = config().getInt("distributed_ddl.pool_size", 1); if (pool_size < 1) throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND); - global_context->setDDLWorker(std::make_unique(pool_size, ddl_zookeeper_path, *global_context, &config(), "distributed_ddl")); + global_context->setDDLWorker(std::make_unique(pool_size, ddl_zookeeper_path, *global_context, &config(), + "distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID)); } std::unique_ptr dns_cache_updater; @@ -1056,8 +1073,10 @@ int Server::main(const std::vector & /*args*/) socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params)); LOG_INFO(log, "Listening for http://{}", address.toString()); }); @@ -1071,8 +1090,10 @@ int Server::main(const std::vector & /*args*/) auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); LOG_INFO(log, "Listening for https://{}", address.toString()); #else @@ -1146,8 +1167,14 @@ int Server::main(const std::vector & /*args*/) auto address = socketBindListen(socket, listen_host, port); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), + createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), + server_pool, + socket, + http_params)); LOG_INFO(log, "Listening for replica communication (interserver): http://{}", address.toString()); }); @@ -1160,8 +1187,14 @@ int Server::main(const std::vector & /*args*/) auto address = socketBindListen(socket, listen_host, port, /* secure = */ true); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), + createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), + server_pool, + socket, + http_params)); LOG_INFO(log, "Listening for secure replica communication (interserver): https://{}", address.toString()); #else @@ -1221,8 +1254,14 @@ int Server::main(const std::vector & /*args*/) auto address = socketBindListen(socket, listen_host, port); socket.setReceiveTimeout(settings.http_receive_timeout); socket.setSendTimeout(settings.http_send_timeout); - servers->emplace_back(port_name, std::make_unique( - createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params)); + servers->emplace_back( + port_name, + std::make_unique( + context(), + createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), + server_pool, + socket, + http_params)); LOG_INFO(log, "Listening for Prometheus: http://{}", address.toString()); }); diff --git a/programs/server/Server.h b/programs/server/Server.h index c582e475308..fbfc26f6ee5 100644 --- a/programs/server/Server.h +++ b/programs/server/Server.h @@ -51,6 +51,7 @@ public: } void defineOptions(Poco::Util::OptionSet & _options) override; + protected: int run() override; @@ -65,8 +66,6 @@ protected: private: Context * global_context_ptr = nullptr; -private: - Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; using CreateServerFunc = std::function; diff --git a/programs/server/config.xml b/programs/server/config.xml index 849d3dc32ba..ba9b8b04b05 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -284,6 +284,10 @@ In bytes. Cache is single for server. Memory is allocated only on demand. Cache is used when 'use_uncompressed_cache' user setting turned on (off by default). Uncompressed cache is advantageous only for very short queries and in rare cases. + + Note: uncompressed cache can be pointless for lz4, because memory bandwidth + is slower than multi-core decompression on some server configurations. + Enabling it can sometimes paradoxically make queries slower. --> 8589934592 @@ -421,9 +425,15 @@ - + + + + default diff --git a/programs/server/users.xml b/programs/server/users.xml index 3223d855651..ef66891a6a0 100644 --- a/programs/server/users.xml +++ b/programs/server/users.xml @@ -7,9 +7,6 @@ 10000000000 - - 0 - + 4 diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml index 227d87ca92a..ed7f66b1b41 100644 --- a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml +++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_y.xml @@ -11,7 +11,8 @@ elements
- 5 + + 4 diff --git a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml index 8eff3a6407b..d2d7dff61ad 100644 --- a/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml +++ b/tests/integration/test_dictionaries_dependency_xml/configs/dictionaries/dep_z.xml @@ -12,7 +12,8 @@ SELECT intDiv(count(), 5) from dict.dep_y - 5 + + 4 diff --git a/tests/integration/test_dictionaries_dependency_xml/test.py b/tests/integration/test_dictionaries_dependency_xml/test.py index d5453bb4814..b8ebcc6cc4b 100644 --- a/tests/integration/test_dictionaries_dependency_xml/test.py +++ b/tests/integration/test_dictionaries_dependency_xml/test.py @@ -65,7 +65,7 @@ def test_get_data(started_cluster): assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n" assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n" - # dep_x and dep_z are updated only when there `intDiv(count(), 4)` is changed. + # dep_x and dep_z are updated only when there `intDiv(count(), 5)` is changed. query("INSERT INTO test.elements VALUES (4, 'ether', 404, 0.001)") assert_eq_with_retry(instance, "SELECT dictHas('dep_x', toUInt64(4))", "1", sleep_time=2, retry_count=10) assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "fire\n" diff --git a/tests/integration/test_distributed_ddl/cluster.py b/tests/integration/test_distributed_ddl/cluster.py index 811eb94bad4..24f11fec547 100644 --- a/tests/integration/test_distributed_ddl/cluster.py +++ b/tests/integration/test_distributed_ddl/cluster.py @@ -10,8 +10,8 @@ from helpers.test_tools import TSV class ClickHouseClusterWithDDLHelpers(ClickHouseCluster): - def __init__(self, base_path, config_dir): - ClickHouseCluster.__init__(self, base_path) + def __init__(self, base_path, config_dir, testcase_name): + ClickHouseCluster.__init__(self, base_path, name=testcase_name) self.test_config_dir = config_dir @@ -104,8 +104,8 @@ class ClickHouseClusterWithDDLHelpers(ClickHouseCluster): def ddl_check_there_are_no_dublicates(instance): query = "SELECT max(c), argMax(q, c) FROM (SELECT lower(query) AS q, count() AS c FROM system.query_log WHERE type=2 AND q LIKE '/* ddl_entry=query-%' GROUP BY query)" rows = instance.query(query) - assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}, query {}".format(instance.name, - instance.ip_address, query) + assert len(rows) > 0 and rows[0][0] == "1", "dublicates on {} {}: {}".format(instance.name, + instance.ip_address, rows) @staticmethod def insert_reliable(instance, query_insert): diff --git a/tests/integration/test_distributed_ddl/test.py b/tests/integration/test_distributed_ddl/test.py index f0e78dfec41..58e1d0d06f7 100755 --- a/tests/integration/test_distributed_ddl/test.py +++ b/tests/integration/test_distributed_ddl/test.py @@ -14,7 +14,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers @pytest.fixture(scope="module", params=["configs", "configs_secure"]) def test_cluster(request): - cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param) + cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, request.param) try: cluster.prepare() diff --git a/tests/integration/test_distributed_ddl/test_replicated_alter.py b/tests/integration/test_distributed_ddl/test_replicated_alter.py index bd95f5660b7..148ad5fca5e 100644 --- a/tests/integration/test_distributed_ddl/test_replicated_alter.py +++ b/tests/integration/test_distributed_ddl/test_replicated_alter.py @@ -12,7 +12,7 @@ from .cluster import ClickHouseClusterWithDDLHelpers @pytest.fixture(scope="module", params=["configs", "configs_secure"]) def test_cluster(request): - cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param) + cluster = ClickHouseClusterWithDDLHelpers(__file__, request.param, "alters_" + request.param) try: # TODO: Fix ON CLUSTER alters when nodes have different configs. Need to canonicalize node identity. diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference b/tests/integration/test_insert_distributed_async_extra_dirs/__init__.py similarity index 100% rename from tests/queries/0_stateless/01641_memory_tracking_insert_optimize.reference rename to tests/integration/test_insert_distributed_async_extra_dirs/__init__.py diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml b/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml new file mode 100644 index 00000000000..1df72377ce6 --- /dev/null +++ b/tests/integration/test_insert_distributed_async_extra_dirs/configs/remote_servers.xml @@ -0,0 +1,13 @@ + + + + + + node + 9000 + + + + + + diff --git a/tests/integration/test_insert_distributed_async_extra_dirs/test.py b/tests/integration/test_insert_distributed_async_extra_dirs/test.py new file mode 100644 index 00000000000..8365fce298d --- /dev/null +++ b/tests/integration/test_insert_distributed_async_extra_dirs/test.py @@ -0,0 +1,43 @@ +# pylint: disable=unused-argument +# pylint: disable=redefined-outer-name +# pylint: disable=line-too-long + +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance('node', main_configs=['configs/remote_servers.xml'], stay_alive=True) + +@pytest.fixture(scope='module', autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_insert_distributed_async_send_success(): + node.query('CREATE TABLE data (key Int, value String) Engine=Null()') + node.query(""" + CREATE TABLE dist AS data + Engine=Distributed( + test_cluster, + currentDatabase(), + data, + key + ) + """) + + node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard10000_replica10000']) + node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard10000_replica10000/1.bin']) + + node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard1_replica10000']) + node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard1_replica10000/1.bin']) + + node.exec_in_container(['bash', '-c', 'mkdir /var/lib/clickhouse/data/default/dist/shard10000_replica1']) + node.exec_in_container(['bash', '-c', 'touch /var/lib/clickhouse/data/default/dist/shard10000_replica1/1.bin']) + + # will check that clickhouse-server is alive + node.restart_clickhouse() diff --git a/tests/integration/test_insert_distributed_async_send/test.py b/tests/integration/test_insert_distributed_async_send/test.py index 7f6a2887c3b..b469da4e2e1 100644 --- a/tests/integration/test_insert_distributed_async_send/test.py +++ b/tests/integration/test_insert_distributed_async_send/test.py @@ -175,38 +175,43 @@ def test_insert_distributed_async_send_different_header(batch): create_tables('insert_distributed_async_send_cluster_two_shards') node = get_node(batch) - node.query("INSERT INTO dist VALUES (0, '')", settings={ + node.query("INSERT INTO dist VALUES (0, 'f')", settings={ 'prefer_localhost_replica': 0, }) - node.query('ALTER TABLE dist MODIFY COLUMN value Nullable(String)') - node.query("INSERT INTO dist VALUES (2, '')", settings={ + node.query('ALTER TABLE dist MODIFY COLUMN value UInt64') + node.query("INSERT INTO dist VALUES (2, 1)", settings={ 'prefer_localhost_replica': 0, }) + n1.query('ALTER TABLE data MODIFY COLUMN value UInt64', settings={ + 'mutations_sync': 1, + }) + if batch: - # first batch with Nullable(String) - n1.query('ALTER TABLE data MODIFY COLUMN value Nullable(String)', settings={ - 'mutations_sync': 1, - }) - # but only one batch will be sent - with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot convert: String to Nullable\(String\)\. Stack trace:"): + # but only one batch will be sent, and first is with UInt64 column, so + # one rows inserted, and for string ('f') exception will be throw. + with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot parse string 'f' as UInt64: syntax error at begin of string"): node.query('SYSTEM FLUSH DISTRIBUTED dist') assert int(n1.query('SELECT count() FROM data')) == 1 - # second batch with String - n1.query('ALTER TABLE data MODIFY COLUMN value String', settings={ - 'mutations_sync': 1, - }) + # but once underlying column String, implicit conversion will do the + # thing, and insert left batch. + n1.query(""" + DROP TABLE data SYNC; + CREATE TABLE data (key Int, value String) Engine=MergeTree() ORDER BY key; + """) node.query('SYSTEM FLUSH DISTRIBUTED dist') - assert int(n1.query('SELECT count() FROM data')) == 2 - else: - # first send with String - with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot convert: Nullable\(String\) to String\. Stack trace:"): - node.query('SYSTEM FLUSH DISTRIBUTED dist') assert int(n1.query('SELECT count() FROM data')) == 1 - # second send with Nullable(String) - n1.query('ALTER TABLE data MODIFY COLUMN value Nullable(String)', settings={ - 'mutations_sync': 1, - }) + else: + # first send with String ('f'), so zero rows will be inserted + with pytest.raises(QueryRuntimeException, match=r"DB::Exception: Cannot parse string 'f' as UInt64: syntax error at begin of string"): + node.query('SYSTEM FLUSH DISTRIBUTED dist') + assert int(n1.query('SELECT count() FROM data')) == 0 + # but once underlying column String, implicit conversion will do the + # thing, and insert 2 rows (mixed UInt64 and String). + n1.query(""" + DROP TABLE data SYNC; + CREATE TABLE data (key Int, value String) Engine=MergeTree() ORDER BY key; + """) node.query('SYSTEM FLUSH DISTRIBUTED dist') assert int(n1.query('SELECT count() FROM data')) == 2 diff --git a/tests/integration/test_limited_replicated_fetches/test.py b/tests/integration/test_limited_replicated_fetches/test.py index 2091c65857e..9b9b8befd67 100644 --- a/tests/integration/test_limited_replicated_fetches/test.py +++ b/tests/integration/test_limited_replicated_fetches/test.py @@ -69,3 +69,6 @@ def test_limited_fetches(started_cluster): assert max([len(parts) for parts in fetches_result]) == 3, "Strange, but we don't utilize max concurrent threads for fetches" assert(max(background_fetches_metric)) == 3, "Just checking metric consistent with table" + + node1.query("DROP TABLE IF EXISTS t SYNC") + node2.query("DROP TABLE IF EXISTS t SYNC") \ No newline at end of file diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py index 8bb1fdb84e0..c9be2387fc7 100644 --- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py @@ -10,6 +10,7 @@ import random import threading from multiprocessing.dummy import Pool +from helpers.test_tools import assert_eq_with_retry def check_query(clickhouse_node, query, result_set, retry_count=60, interval_seconds=3): lastest_result = '' @@ -79,9 +80,9 @@ def dml_with_materialize_mysql_database(clickhouse_node, mysql_node, service_nam check_query(clickhouse_node, """ SELECT key, unsigned_tiny_int, tiny_int, unsigned_small_int, - small_int, unsigned_medium_int, medium_int, unsigned_int, _int, unsigned_integer, _integer, + small_int, unsigned_medium_int, medium_int, unsigned_int, _int, unsigned_integer, _integer, unsigned_bigint, _bigint, unsigned_float, _float, unsigned_double, _double, _varchar, _char, binary_col, - _date, _datetime, /* exclude it, because ON UPDATE CURRENT_TIMESTAMP _timestamp, */ + _date, _datetime, /* exclude it, because ON UPDATE CURRENT_TIMESTAMP _timestamp, */ _bool FROM test_database.test_table_1 ORDER BY key FORMAT TSV """, "1\t2\t-1\t2\t-2\t3\t-3\t4\t-4\t5\t-5\t6\t-6\t3.2\t-3.2\t3.4\t-3.4\tvarchar\tchar\tbinary\\0\\0\t2020-01-01\t" @@ -485,7 +486,7 @@ def select_without_columns(clickhouse_node, mysql_node, service_name): check_query(clickhouse_node, "SELECT count((_sign, _version)) FROM db.t FORMAT TSV", res[0]) assert clickhouse_node.query("SELECT count(_sign) FROM db.t FORMAT TSV") == res[1] - assert clickhouse_node.query("SELECT count(_version) FROM db.t FORMAT TSV") == res[2] + assert_eq_with_retry(clickhouse_node, "SELECT count(_version) FROM db.t", res[2].strip(), sleep_time=2, retry_count=3) assert clickhouse_node.query("SELECT count() FROM db.t FORMAT TSV") == "1\n" assert clickhouse_node.query("SELECT count(*) FROM db.t FORMAT TSV") == "1\n" @@ -720,7 +721,7 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name): t = threading.Thread(target=insert, args=(1000,)) t.start() - + # TODO: add clickhouse_node.restart_clickhouse(20, kill=False) test clickhouse_node.restart_clickhouse(20, kill=True) t.join() @@ -732,3 +733,50 @@ def clickhouse_killed_while_insert(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE kill_clickhouse_while_insert") clickhouse_node.query("DROP DATABASE kill_clickhouse_while_insert") + +def utf8mb4_test(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS utf8mb4_test") + clickhouse_node.query("DROP DATABASE IF EXISTS utf8mb4_test") + mysql_node.query("CREATE DATABASE utf8mb4_test") + mysql_node.query("CREATE TABLE utf8mb4_test.test (id INT(11) NOT NULL PRIMARY KEY, name VARCHAR(255)) ENGINE=InnoDB DEFAULT CHARACTER SET utf8mb4") + mysql_node.query("INSERT INTO utf8mb4_test.test VALUES(1, '🦄'),(2, '\u2601')") + clickhouse_node.query("CREATE DATABASE utf8mb4_test ENGINE = MaterializeMySQL('{}:3306', 'utf8mb4_test', 'root', 'clickhouse')".format(service_name)) + check_query(clickhouse_node, "SELECT id, name FROM utf8mb4_test.test ORDER BY id", "1\t\U0001F984\n2\t\u2601\n") + +def system_parts_test(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS system_parts_test") + clickhouse_node.query("DROP DATABASE IF EXISTS system_parts_test") + mysql_node.query("CREATE DATABASE system_parts_test") + mysql_node.query("CREATE TABLE system_parts_test.test ( `id` int(11) NOT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB;") + mysql_node.query("INSERT INTO system_parts_test.test VALUES(1),(2),(3)") + def check_active_parts(num): + check_query(clickhouse_node, "SELECT count() FROM system.parts WHERE database = 'system_parts_test' AND table = 'test' AND active = 1", "{}\n".format(num)) + clickhouse_node.query("CREATE DATABASE system_parts_test ENGINE = MaterializeMySQL('{}:3306', 'system_parts_test', 'root', 'clickhouse')".format(service_name)) + check_active_parts(1) + mysql_node.query("INSERT INTO system_parts_test.test VALUES(4),(5),(6)") + check_active_parts(2) + clickhouse_node.query("OPTIMIZE TABLE system_parts_test.test") + check_active_parts(1) + +def multi_table_update_test(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS multi_table_update") + clickhouse_node.query("DROP DATABASE IF EXISTS multi_table_update") + mysql_node.query("CREATE DATABASE multi_table_update") + mysql_node.query("CREATE TABLE multi_table_update.a (id INT(11) NOT NULL PRIMARY KEY, value VARCHAR(255))") + mysql_node.query("CREATE TABLE multi_table_update.b (id INT(11) NOT NULL PRIMARY KEY, othervalue VARCHAR(255))") + mysql_node.query("INSERT INTO multi_table_update.a VALUES(1, 'foo')") + mysql_node.query("INSERT INTO multi_table_update.b VALUES(1, 'bar')") + clickhouse_node.query("CREATE DATABASE multi_table_update ENGINE = MaterializeMySQL('{}:3306', 'multi_table_update', 'root', 'clickhouse')".format(service_name)) + check_query(clickhouse_node, "SHOW TABLES FROM multi_table_update", "a\nb\n") + mysql_node.query("UPDATE multi_table_update.a, multi_table_update.b SET value='baz', othervalue='quux' where a.id=b.id") + + check_query(clickhouse_node, "SELECT * FROM multi_table_update.a", "1\tbaz\n") + check_query(clickhouse_node, "SELECT * FROM multi_table_update.b", "1\tquux\n") + +def system_tables_test(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS system_tables_test") + clickhouse_node.query("DROP DATABASE IF EXISTS system_tables_test") + mysql_node.query("CREATE DATABASE system_tables_test") + mysql_node.query("CREATE TABLE system_tables_test.test (id int NOT NULL PRIMARY KEY) ENGINE=InnoDB") + clickhouse_node.query("CREATE DATABASE system_tables_test ENGINE = MaterializeMySQL('{}:3306', 'system_tables_test', 'root', 'clickhouse')".format(service_name)) + check_query(clickhouse_node, "SELECT partition_key, sorting_key, primary_key FROM system.tables WHERE database = 'system_tables_test' AND name = 'test'", "intDiv(id, 4294967)\tid\tid\n") diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py index dbd6e894987..ced9a978d02 100644 --- a/tests/integration/test_materialize_mysql_database/test.py +++ b/tests/integration/test_materialize_mysql_database/test.py @@ -37,6 +37,12 @@ class MySQLNodeInstance: self.docker_compose = docker_compose self.project_name = project_name + self.base_dir = p.dirname(__file__) + self.instances_dir = p.join(self.base_dir, '_instances_mysql') + if not os.path.exists(self.instances_dir): + os.mkdir(self.instances_dir) + self.docker_logs_path = p.join(self.instances_dir, 'docker_mysql.log') + def alloc_connection(self): if self.mysql_connection is None: @@ -71,10 +77,28 @@ class MySQLNodeInstance: cursor.execute(executio_query) return cursor.fetchall() + def start_and_wait(self): + run_and_check(['docker-compose', + '-p', cluster.project_name, + '-f', self.docker_compose, + 'up', '--no-recreate', '-d', + ]) + self.wait_mysql_to_start(120) + def close(self): if self.mysql_connection is not None: self.mysql_connection.close() + with open(self.docker_logs_path, "w+") as f: + try: + run_and_check([ + 'docker-compose', + '-p', cluster.project_name, + '-f', self.docker_compose, 'logs', + ], stdout=f) + except Exception as e: + print("Unable to get logs from docker mysql.") + def wait_mysql_to_start(self, timeout=60): start = time.time() while time.time() - start < timeout: @@ -95,9 +119,7 @@ def started_mysql_5_7(): mysql_node = MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', 3308, docker_compose) try: - run_and_check( - ['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d']) - mysql_node.wait_mysql_to_start(120) + mysql_node.start_and_wait() yield mysql_node finally: mysql_node.close() @@ -111,9 +133,7 @@ def started_mysql_8_0(): mysql_node = MySQLNodeInstance('root', 'clickhouse', '127.0.0.1', 33308, docker_compose) try: - run_and_check( - ['docker-compose', '-p', cluster.project_name, '-f', docker_compose, 'up', '--no-recreate', '-d']) - mysql_node.wait_mysql_to_start(120) + mysql_node.start_and_wait() yield mysql_node finally: mysql_node.close() @@ -228,3 +248,21 @@ def test_clickhouse_killed_while_insert_5_7(started_cluster, started_mysql_5_7, @pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_atomic]) def test_clickhouse_killed_while_insert_8_0(started_cluster, started_mysql_8_0, clickhouse_node): materialize_with_ddl.clickhouse_killed_while_insert(clickhouse_node, started_mysql_8_0, "mysql8_0") + +@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary]) +def test_utf8mb4(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): + materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_5_7, "mysql1") + materialize_with_ddl.utf8mb4_test(clickhouse_node, started_mysql_8_0, "mysql8_0") + +@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary]) +def test_system_parts_table(started_cluster, started_mysql_8_0, clickhouse_node): + materialize_with_ddl.system_parts_test(clickhouse_node, started_mysql_8_0, "mysql8_0") + +@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary]) +def test_multi_table_update(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): + materialize_with_ddl.multi_table_update_test(clickhouse_node, started_mysql_5_7, "mysql1") + materialize_with_ddl.multi_table_update_test(clickhouse_node, started_mysql_8_0, "mysql8_0") + +@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_ordinary]) +def test_system_tables_table(started_cluster, started_mysql_8_0, clickhouse_node): + materialize_with_ddl.system_tables_test(clickhouse_node, started_mysql_8_0, "mysql8_0") diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml new file mode 100644 index 00000000000..a756c4434ea --- /dev/null +++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/bg_processing_pool_conf.xml @@ -0,0 +1,5 @@ + + 0.5 + 0.5 + 0.5 + diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml new file mode 100644 index 00000000000..318a6bca95d --- /dev/null +++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/log_conf.xml @@ -0,0 +1,12 @@ + + 3 + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml new file mode 100644 index 00000000000..9361a21efca --- /dev/null +++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf.xml @@ -0,0 +1,34 @@ + + + + + s3 + http://minio1:9001/root/data/ + minio + minio123 + true + 1 + + + local + / + + + + + +
+ s3 +
+ + hdd + +
+
+
+
+ + + 0 + +
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml new file mode 100644 index 00000000000..645d1111ab8 --- /dev/null +++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket.xml @@ -0,0 +1,34 @@ + + + + + s3 + http://minio1:9001/root2/data/ + minio + minio123 + true + 1 + + + local + / + + + + + +
+ s3 +
+ + hdd + +
+
+
+
+ + + 0 + +
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml new file mode 100644 index 00000000000..42207674c79 --- /dev/null +++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/storage_conf_another_bucket_path.xml @@ -0,0 +1,34 @@ + + + + + s3 + http://minio1:9001/root2/another_data/ + minio + minio123 + true + 1 + + + local + / + + + + + +
+ s3 +
+ + hdd + +
+
+
+
+ + + 0 + +
diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml new file mode 100644 index 00000000000..797113053f4 --- /dev/null +++ b/tests/integration/test_merge_tree_s3_restore/configs/config.d/users.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/tests/integration/test_merge_tree_s3_restore/configs/config.xml b/tests/integration/test_merge_tree_s3_restore/configs/config.xml new file mode 100644 index 00000000000..24b7344df3a --- /dev/null +++ b/tests/integration/test_merge_tree_s3_restore/configs/config.xml @@ -0,0 +1,20 @@ + + + 9000 + 127.0.0.1 + + + + true + none + + AcceptCertificateHandler + + + + + 500 + 5368709120 + ./clickhouse/ + users.xml + diff --git a/tests/integration/test_merge_tree_s3_restore/test.py b/tests/integration/test_merge_tree_s3_restore/test.py new file mode 100644 index 00000000000..346d9aced3f --- /dev/null +++ b/tests/integration/test_merge_tree_s3_restore/test.py @@ -0,0 +1,313 @@ +import logging +import random +import string +import time + +import pytest +from helpers.cluster import ClickHouseCluster + +logging.getLogger().setLevel(logging.INFO) +logging.getLogger().addHandler(logging.StreamHandler()) + + +@pytest.fixture(scope="module") +def cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance("node", main_configs=[ + "configs/config.d/storage_conf.xml", + "configs/config.d/bg_processing_pool_conf.xml", + "configs/config.d/log_conf.xml"], user_configs=[], with_minio=True, stay_alive=True) + cluster.add_instance("node_another_bucket", main_configs=[ + "configs/config.d/storage_conf_another_bucket.xml", + "configs/config.d/bg_processing_pool_conf.xml", + "configs/config.d/log_conf.xml"], user_configs=[], stay_alive=True) + cluster.add_instance("node_another_bucket_path", main_configs=[ + "configs/config.d/storage_conf_another_bucket_path.xml", + "configs/config.d/bg_processing_pool_conf.xml", + "configs/config.d/log_conf.xml"], user_configs=[], stay_alive=True) + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def random_string(length): + letters = string.ascii_letters + return ''.join(random.choice(letters) for i in range(length)) + + +def generate_values(date_str, count, sign=1): + data = [[date_str, sign * (i + 1), random_string(10)] for i in range(count)] + data.sort(key=lambda tup: tup[1]) + return ",".join(["('{}',{},'{}',{})".format(x, y, z, 0) for x, y, z in data]) + + +def create_table(node, table_name, additional_settings=None): + node.query("CREATE DATABASE IF NOT EXISTS s3 ENGINE = Ordinary") + + create_table_statement = """ + CREATE TABLE s3.{} ( + dt Date, + id Int64, + data String, + counter Int64, + INDEX min_max (id) TYPE minmax GRANULARITY 3 + ) ENGINE=MergeTree() + PARTITION BY dt + ORDER BY (dt, id) + SETTINGS + storage_policy='s3', + old_parts_lifetime=600, + index_granularity=512 + """.format(table_name) + + if additional_settings: + create_table_statement += "," + create_table_statement += additional_settings + + node.query(create_table_statement) + + +def purge_s3(cluster, bucket): + minio = cluster.minio_client + for obj in list(minio.list_objects(bucket, recursive=True)): + minio.remove_object(bucket, obj.object_name) + + +def drop_s3_metadata(node): + node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/disks/s3/*'], user='root') + + +def drop_shadow_information(node): + node.exec_in_container(['bash', '-c', 'rm -rf /var/lib/clickhouse/shadow/*'], user='root') + + +def create_restore_file(node, revision=0, bucket=None, path=None): + add_restore_option = 'echo -en "{}\n" >> /var/lib/clickhouse/disks/s3/restore' + node.exec_in_container(['bash', '-c', add_restore_option.format(revision)], user='root') + if bucket: + node.exec_in_container(['bash', '-c', add_restore_option.format(bucket)], user='root') + if path: + node.exec_in_container(['bash', '-c', add_restore_option.format(path)], user='root') + + +def get_revision_counter(node, backup_number): + return int(node.exec_in_container(['bash', '-c', 'cat /var/lib/clickhouse/disks/s3/shadow/{}/revision.txt'.format(backup_number)], user='root')) + + +@pytest.fixture(autouse=True) +def drop_table(cluster): + yield + + node_names = ["node", "node_another_bucket", "node_another_bucket_path"] + + for node_name in node_names: + node = cluster.instances[node_name] + node.query("DROP TABLE IF EXISTS s3.test NO DELAY") + + drop_s3_metadata(node) + drop_shadow_information(node) + + buckets = [cluster.minio_bucket, cluster.minio_bucket_2] + for bucket in buckets: + purge_s3(cluster, bucket) + + +def test_full_restore(cluster): + node = cluster.instances["node"] + + create_table(node, "test") + + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096))) + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-04', 4096, -1))) + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096))) + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096, -1))) + + # To ensure parts have merged + node.query("OPTIMIZE TABLE s3.test") + + assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4) + assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + + node.stop_clickhouse() + drop_s3_metadata(node) + node.start_clickhouse() + + # All data is removed. + assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(0) + + node.stop_clickhouse() + create_restore_file(node) + node.start_clickhouse(10) + + assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4) + assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + + +def test_restore_another_bucket_path(cluster): + node = cluster.instances["node"] + + create_table(node, "test") + + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096))) + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-04', 4096, -1))) + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096))) + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096, -1))) + + # To ensure parts have merged + node.query("OPTIMIZE TABLE s3.test") + + assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4) + assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + + node_another_bucket = cluster.instances["node_another_bucket"] + + create_table(node_another_bucket, "test") + + node_another_bucket.stop_clickhouse() + create_restore_file(node_another_bucket, bucket="root") + node_another_bucket.start_clickhouse(10) + + assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4) + assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + + node_another_bucket_path = cluster.instances["node_another_bucket_path"] + + create_table(node_another_bucket_path, "test") + + node_another_bucket_path.stop_clickhouse() + create_restore_file(node_another_bucket_path, bucket="root2", path="data") + node_another_bucket_path.start_clickhouse(10) + + assert node_another_bucket_path.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4) + assert node_another_bucket_path.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + + +def test_restore_different_revisions(cluster): + node = cluster.instances["node"] + + create_table(node, "test") + + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096))) + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-04', 4096, -1))) + + node.query("ALTER TABLE s3.test FREEZE") + revision1 = get_revision_counter(node, 1) + + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096))) + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-05', 4096, -1))) + + node.query("ALTER TABLE s3.test FREEZE") + revision2 = get_revision_counter(node, 2) + + # To ensure parts have merged + node.query("OPTIMIZE TABLE s3.test") + + node.query("ALTER TABLE s3.test FREEZE") + revision3 = get_revision_counter(node, 3) + + assert node.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4) + assert node.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + assert node.query("SELECT count(*) from system.parts where table = 'test'") == '5\n' + + node_another_bucket = cluster.instances["node_another_bucket"] + + create_table(node_another_bucket, "test") + + # Restore to revision 1 (2 parts). + node_another_bucket.stop_clickhouse() + drop_s3_metadata(node_another_bucket) + purge_s3(cluster, cluster.minio_bucket_2) + create_restore_file(node_another_bucket, revision=revision1, bucket="root") + node_another_bucket.start_clickhouse(10) + + assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2) + assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + assert node_another_bucket.query("SELECT count(*) from system.parts where table = 'test'") == '2\n' + + # Restore to revision 2 (4 parts). + node_another_bucket.stop_clickhouse() + drop_s3_metadata(node_another_bucket) + purge_s3(cluster, cluster.minio_bucket_2) + create_restore_file(node_another_bucket, revision=revision2, bucket="root") + node_another_bucket.start_clickhouse(10) + + assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4) + assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + assert node_another_bucket.query("SELECT count(*) from system.parts where table = 'test'") == '4\n' + + # Restore to revision 3 (4 parts + 1 merged). + node_another_bucket.stop_clickhouse() + drop_s3_metadata(node_another_bucket) + purge_s3(cluster, cluster.minio_bucket_2) + create_restore_file(node_another_bucket, revision=revision3, bucket="root") + node_another_bucket.start_clickhouse(10) + + assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 4) + assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + assert node_another_bucket.query("SELECT count(*) from system.parts where table = 'test'") == '5\n' + + +def test_restore_mutations(cluster): + node = cluster.instances["node"] + + create_table(node, "test") + + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096))) + node.query("INSERT INTO s3.test VALUES {}".format(generate_values('2020-01-03', 4096, -1))) + + node.query("ALTER TABLE s3.test FREEZE") + revision_before_mutation = get_revision_counter(node, 1) + + node.query("ALTER TABLE s3.test UPDATE counter = 1 WHERE 1", settings={"mutations_sync": 2}) + + node.query("ALTER TABLE s3.test FREEZE") + revision_after_mutation = get_revision_counter(node, 2) + + node_another_bucket = cluster.instances["node_another_bucket"] + + create_table(node_another_bucket, "test") + + # Restore to revision before mutation. + node_another_bucket.stop_clickhouse() + drop_s3_metadata(node_another_bucket) + purge_s3(cluster, cluster.minio_bucket_2) + create_restore_file(node_another_bucket, revision=revision_before_mutation, bucket="root") + node_another_bucket.start_clickhouse(10) + + assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2) + assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(0) + + # Restore to revision after mutation. + node_another_bucket.stop_clickhouse() + drop_s3_metadata(node_another_bucket) + purge_s3(cluster, cluster.minio_bucket_2) + create_restore_file(node_another_bucket, revision=revision_after_mutation, bucket="root") + node_another_bucket.start_clickhouse(10) + + assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2) + assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(4096 * 2) + assert node_another_bucket.query("SELECT sum(counter) FROM s3.test WHERE id > 0 FORMAT Values") == "({})".format(4096) + + # Restore to revision in the middle of mutation. + # Unfinished mutation should be completed after table startup. + node_another_bucket.stop_clickhouse() + drop_s3_metadata(node_another_bucket) + purge_s3(cluster, cluster.minio_bucket_2) + revision = (revision_before_mutation + revision_after_mutation) // 2 + create_restore_file(node_another_bucket, revision=revision, bucket="root") + node_another_bucket.start_clickhouse(10) + + # Wait for unfinished mutation completion. + time.sleep(3) + + assert node_another_bucket.query("SELECT count(*) FROM s3.test FORMAT Values") == "({})".format(4096 * 2) + assert node_another_bucket.query("SELECT sum(id) FROM s3.test FORMAT Values") == "({})".format(0) + assert node_another_bucket.query("SELECT sum(counter) FROM s3.test FORMAT Values") == "({})".format(4096 * 2) + assert node_another_bucket.query("SELECT sum(counter) FROM s3.test WHERE id > 0 FORMAT Values") == "({})".format(4096) diff --git a/tests/integration/test_mysql_protocol/test.py b/tests/integration/test_mysql_protocol/test.py index 9532d4b8ba2..7f7d59674bc 100644 --- a/tests/integration/test_mysql_protocol/test.py +++ b/tests/integration/test_mysql_protocol/test.py @@ -217,7 +217,7 @@ def test_mysql_replacement_query(mysql_client, server_address): --password=123 -e "select database();" '''.format(host=server_address, port=server_port), demux=True) assert code == 0 - assert stdout.decode() == 'database()\ndefault\n' + assert stdout.decode() == 'DATABASE()\ndefault\n' code, (stdout, stderr) = mysql_client.exec_run(''' mysql --protocol tcp -h {host} -P {port} default -u default diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 0ec89be9413..6bb6a6ee777 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -262,18 +262,20 @@ def test_sqlite_odbc_cached_dictionary(started_cluster): assert_eq_with_retry(node1, "select dictGetUInt8('sqlite3_odbc_cached', 'Z', toUInt64(1))", "12") -def test_postgres_odbc_hached_dictionary_with_schema(started_cluster): +def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): conn = get_postgres_conn() cursor = conn.cursor() + cursor.execute("truncate table clickhouse.test_table") cursor.execute("insert into clickhouse.test_table values(1, 'hello'),(2, 'world')") node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_hashed") assert_eq_with_retry(node1, "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(1))", "hello") assert_eq_with_retry(node1, "select dictGetString('postgres_odbc_hashed', 'column2', toUInt64(2))", "world") -def test_postgres_odbc_hached_dictionary_no_tty_pipe_overflow(started_cluster): +def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): conn = get_postgres_conn() cursor = conn.cursor() + cursor.execute("truncate table clickhouse.test_table") cursor.execute("insert into clickhouse.test_table values(3, 'xxx')") for i in range(100): try: @@ -340,3 +342,25 @@ def test_bridge_dies_with_parent(started_cluster): assert clickhouse_pid is None assert bridge_pid is None + + +def test_odbc_postgres_date_data_type(started_cluster): + conn = get_postgres_conn(); + cursor = conn.cursor() + cursor.execute("CREATE TABLE IF NOT EXISTS clickhouse.test_date (column1 integer, column2 date)") + + cursor.execute("INSERT INTO clickhouse.test_date VALUES (1, '2020-12-01')") + cursor.execute("INSERT INTO clickhouse.test_date VALUES (2, '2020-12-02')") + cursor.execute("INSERT INTO clickhouse.test_date VALUES (3, '2020-12-03')") + conn.commit() + + node1.query( + ''' + CREATE TABLE test_date (column1 UInt64, column2 Date) + ENGINE=ODBC('DSN=postgresql_odbc; Servername=postgre-sql.local', 'clickhouse', 'test_date')''') + + expected = '1\t2020-12-01\n2\t2020-12-02\n3\t2020-12-03\n' + result = node1.query('SELECT * FROM test_date'); + assert(result == expected) + + diff --git a/tests/integration/test_query_deduplication/__init__.py b/tests/integration/test_query_deduplication/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_query_deduplication/configs/deduplication_settings.xml b/tests/integration/test_query_deduplication/configs/deduplication_settings.xml new file mode 100644 index 00000000000..8369c916848 --- /dev/null +++ b/tests/integration/test_query_deduplication/configs/deduplication_settings.xml @@ -0,0 +1,5 @@ + + + 1 + + diff --git a/tests/integration/test_query_deduplication/configs/remote_servers.xml b/tests/integration/test_query_deduplication/configs/remote_servers.xml new file mode 100644 index 00000000000..f12558ca529 --- /dev/null +++ b/tests/integration/test_query_deduplication/configs/remote_servers.xml @@ -0,0 +1,24 @@ + + + + + + node1 + 9000 + + + + + node2 + 9000 + + + + + node3 + 9000 + + + + + diff --git a/tests/integration/test_query_deduplication/test.py b/tests/integration/test_query_deduplication/test.py new file mode 100644 index 00000000000..8d935b98579 --- /dev/null +++ b/tests/integration/test_query_deduplication/test.py @@ -0,0 +1,165 @@ +import uuid + +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +DUPLICATED_UUID = uuid.uuid4() + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + 'node1', + main_configs=['configs/remote_servers.xml', 'configs/deduplication_settings.xml']) + +node2 = cluster.add_instance( + 'node2', + main_configs=['configs/remote_servers.xml', 'configs/deduplication_settings.xml']) + +node3 = cluster.add_instance( + 'node3', + main_configs=['configs/remote_servers.xml', 'configs/deduplication_settings.xml']) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def prepare_node(node, parts_uuid=None): + node.query(""" + CREATE TABLE t(_prefix UInt8 DEFAULT 0, key UInt64, value UInt64) + ENGINE MergeTree() + ORDER BY tuple() + PARTITION BY _prefix + SETTINGS index_granularity = 1 + """) + + node.query(""" + CREATE TABLE d AS t ENGINE=Distributed(test_cluster, default, t) + """) + + # Stop merges while populating test data + node.query("SYSTEM STOP MERGES") + + # Create 5 parts + for i in range(1, 6): + node.query("INSERT INTO t VALUES ({}, {}, {})".format(i, i, i)) + + node.query("DETACH TABLE t") + + if parts_uuid: + for part, part_uuid in parts_uuid: + script = """ + echo -n '{}' > /var/lib/clickhouse/data/default/t/{}/uuid.txt + """.format(part_uuid, part) + node.exec_in_container(["bash", "-c", script]) + + # Attach table back + node.query("ATTACH TABLE t") + + # NOTE: + # due to absence of the ability to lock part, need to operate on parts with preventin merges + # node.query("SYSTEM START MERGES") + # node.query("OPTIMIZE TABLE t FINAL") + + print(node.name) + print(node.query("SELECT name, uuid, partition FROM system.parts WHERE table = 't' AND active ORDER BY name")) + + assert '5' == node.query("SELECT count() FROM system.parts WHERE table = 't' AND active").strip() + if parts_uuid: + for part, part_uuid in parts_uuid: + assert '1' == node.query( + "SELECT count() FROM system.parts WHERE table = 't' AND uuid = '{}' AND active".format( + part_uuid)).strip() + + +@pytest.fixture(scope="module") +def prepared_cluster(started_cluster): + print("duplicated UUID: {}".format(DUPLICATED_UUID)) + prepare_node(node1, parts_uuid=[("3_3_3_0", DUPLICATED_UUID)]) + prepare_node(node2, parts_uuid=[("3_3_3_0", DUPLICATED_UUID)]) + prepare_node(node3) + + +def test_virtual_column(prepared_cluster): + # Part containing `key=3` has the same fingerprint on both nodes, + # we expect it to be included only once in the end result.; + # select query is using virtucal column _part_fingerprint to filter out part in one shard + expected = """ + 1 2 + 2 2 + 3 1 + 4 2 + 5 2 + """ + assert TSV(expected) == TSV(node1.query(""" + SELECT + key, + count() AS c + FROM d + WHERE ((_shard_num = 1) AND (_part_uuid != '{}')) OR (_shard_num = 2) + GROUP BY key + ORDER BY + key ASC + """.format(DUPLICATED_UUID))) + + +def test_with_deduplication(prepared_cluster): + # Part containing `key=3` has the same fingerprint on both nodes, + # we expect it to be included only once in the end result + expected = """ +1 3 +2 3 +3 2 +4 3 +5 3 +""" + assert TSV(expected) == TSV(node1.query( + "SET allow_experimental_query_deduplication=1; SELECT key, count() c FROM d GROUP BY key ORDER BY key")) + + +def test_no_merge_with_deduplication(prepared_cluster): + # Part containing `key=3` has the same fingerprint on both nodes, + # we expect it to be included only once in the end result. + # even with distributed_group_by_no_merge=1 the duplicated part should be excluded from the final result + expected = """ +1 1 +2 1 +3 1 +4 1 +5 1 +1 1 +2 1 +3 1 +4 1 +5 1 +1 1 +2 1 +4 1 +5 1 +""" + assert TSV(expected) == TSV(node1.query("SELECT key, count() c FROM d GROUP BY key ORDER BY key", settings={ + "allow_experimental_query_deduplication": 1, + "distributed_group_by_no_merge": 1, + })) + + +def test_without_deduplication(prepared_cluster): + # Part containing `key=3` has the same fingerprint on both nodes, + # but allow_experimental_query_deduplication is disabled, + # so it will not be excluded + expected = """ +1 3 +2 3 +3 3 +4 3 +5 3 +""" + assert TSV(expected) == TSV(node1.query( + "SET allow_experimental_query_deduplication=0; SELECT key, count() c FROM d GROUP BY key ORDER BY key")) diff --git a/tests/integration/test_quota/configs/users.d/assign_myquota.xml b/tests/integration/test_quota/configs/users.d/assign_myquota_to_default_user.xml similarity index 100% rename from tests/integration/test_quota/configs/users.d/assign_myquota.xml rename to tests/integration/test_quota/configs/users.d/assign_myquota_to_default_user.xml diff --git a/tests/integration/test_quota/configs/users.d/quota.xml b/tests/integration/test_quota/configs/users.d/myquota.xml similarity index 100% rename from tests/integration/test_quota/configs/users.d/quota.xml rename to tests/integration/test_quota/configs/users.d/myquota.xml diff --git a/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml b/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml new file mode 100644 index 00000000000..70f51cfff43 --- /dev/null +++ b/tests/integration/test_quota/configs/users.d/user_with_no_quota.xml @@ -0,0 +1,10 @@ + + + + + + ::/0 + + + + diff --git a/tests/integration/test_quota/normal_limits.xml b/tests/integration/test_quota/normal_limits.xml index b7c3a67b5cc..e32043ef5ec 100644 --- a/tests/integration/test_quota/normal_limits.xml +++ b/tests/integration/test_quota/normal_limits.xml @@ -8,6 +8,8 @@ 1000 + 500 + 500 0 1000 0 diff --git a/tests/integration/test_quota/test.py b/tests/integration/test_quota/test.py index 0614150ee07..353d776c0f3 100644 --- a/tests/integration/test_quota/test.py +++ b/tests/integration/test_quota/test.py @@ -7,9 +7,10 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import assert_eq_with_retry, TSV cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', user_configs=["configs/users.d/assign_myquota.xml", +instance = cluster.add_instance('instance', user_configs=["configs/users.d/assign_myquota_to_default_user.xml", "configs/users.d/drop_default_quota.xml", - "configs/users.d/quota.xml"]) + "configs/users.d/myquota.xml", + "configs/users.d/user_with_no_quota.xml"]) def check_system_quotas(canonical): @@ -28,7 +29,7 @@ def system_quota_limits(canonical): def system_quota_usage(canonical): canonical_tsv = TSV(canonical) - query = "SELECT quota_name, quota_key, duration, queries, max_queries, errors, max_errors, result_rows, max_result_rows," \ + query = "SELECT quota_name, quota_key, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows," \ "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " \ "FROM system.quota_usage ORDER BY duration" r = TSV(instance.query(query)) @@ -38,7 +39,7 @@ def system_quota_usage(canonical): def system_quotas_usage(canonical): canonical_tsv = TSV(canonical) - query = "SELECT quota_name, quota_key, is_current, duration, queries, max_queries, errors, max_errors, result_rows, max_result_rows, " \ + query = "SELECT quota_name, quota_key, is_current, duration, queries, max_queries, query_selects, max_query_selects, query_inserts, max_query_inserts, errors, max_errors, result_rows, max_result_rows, " \ "result_bytes, max_result_bytes, read_rows, max_read_rows, read_bytes, max_read_bytes, max_execution_time " \ "FROM system.quotas_usage ORDER BY quota_name, quota_key, duration" r = TSV(instance.query(query)) @@ -49,9 +50,11 @@ def system_quotas_usage(canonical): def copy_quota_xml(local_file_name, reload_immediately=True): script_dir = os.path.dirname(os.path.realpath(__file__)) instance.copy_file_to_container(os.path.join(script_dir, local_file_name), - '/etc/clickhouse-server/users.d/quota.xml') + '/etc/clickhouse-server/users.d/myquota.xml') if reload_immediately: - instance.query("SYSTEM RELOAD CONFIG") + # We use the special user 'user_with_no_quota' here because + # we don't want SYSTEM RELOAD CONFIG to mess our quota consuming checks. + instance.query("SYSTEM RELOAD CONFIG", user='user_with_no_quota') @pytest.fixture(scope="module", autouse=True) @@ -71,28 +74,29 @@ def started_cluster(): @pytest.fixture(autouse=True) def reset_quotas_and_usage_info(): try: - yield - finally: instance.query("DROP QUOTA IF EXISTS qA, qB") copy_quota_xml('simpliest.xml') # To reset usage info. copy_quota_xml('normal_limits.xml') + yield + finally: + pass def test_quota_from_users_xml(): check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) - system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) system_quotas_usage( - [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) instance.query("SELECT * from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]]) instance.query("SELECT COUNT() from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 2, 1000, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 51, "\\N", 208, "\\N", 50, 1000, 200, "\\N", "\\N"]]) def test_simpliest_quota(): @@ -102,11 +106,11 @@ def test_simpliest_quota(): "['default']", "[]"]]) system_quota_limits("") system_quota_usage( - [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) + [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) instance.query("SELECT * from test_table") system_quota_usage( - [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) + [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) def test_tracking_quota(): @@ -114,16 +118,16 @@ def test_tracking_quota(): copy_quota_xml('tracking.xml') check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) - system_quota_usage([["myQuota", "default", 31556952, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) + system_quota_usage([["myQuota", "default", 31556952, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", "\\N"]]) instance.query("SELECT * from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 1, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, "\\N", 200, "\\N", "\\N"]]) instance.query("SELECT COUNT() from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 2, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 2, "\\N", 2, "\\N", 0, "\\N", 0, "\\N", 51, "\\N", 208, "\\N", 50, "\\N", 200, "\\N", "\\N"]]) def test_exceed_quota(): @@ -131,55 +135,55 @@ def test_exceed_quota(): copy_quota_xml('tiny_limits.xml') check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1, 1, 1, "\\N", 1, "\\N", "\\N"]]) - system_quota_usage([["myQuota", "default", 31556952, 0, 1, 0, 1, 0, 1, 0, "\\N", 0, 1, 0, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]]) + system_quota_usage([["myQuota", "default", 31556952, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, "\\N", 0, 1, 0, "\\N", "\\N"]]) assert re.search("Quota.*has\ been\ exceeded", instance.query_and_get_error("SELECT * from test_table")) - system_quota_usage([["myQuota", "default", 31556952, 1, 1, 1, 1, 0, 1, 0, "\\N", 50, 1, 0, "\\N", "\\N"]]) + system_quota_usage([["myQuota", "default", 31556952, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, "\\N", 50, 1, 0, "\\N", "\\N"]]) # Change quota, now the limits are enough to execute queries. copy_quota_xml('normal_limits.xml') check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) - system_quota_usage([["myQuota", "default", 31556952, 1, 1000, 1, "\\N", 0, "\\N", 0, "\\N", 50, 1000, 0, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_usage([["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 1, "\\N", 0, "\\N", 0, "\\N", 50, 1000, 0, "\\N", "\\N"]]) instance.query("SELECT * from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 2, 1000, 1, "\\N", 50, "\\N", 200, "\\N", 100, 1000, 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 1, "\\N", 50, "\\N", 200, "\\N", 100, 1000, 200, "\\N", "\\N"]]) def test_add_remove_interval(): check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) - system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) # Add interval. copy_quota_xml('two_intervals.xml') check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952,63113904]", 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"], - ["myQuota", 63113904, 1, "\\N", "\\N", "\\N", 30000, "\\N", 20000, 120]]) - system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"], - ["myQuota", "default", 63113904, 0, "\\N", 0, "\\N", 0, "\\N", 0, 30000, 0, "\\N", 0, 20000, 120]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", "\\N", "\\N", 1000, "\\N", "\\N"], + ["myQuota", 63113904, 1, "\\N", "\\N", "\\N", "\\N", "\\N", 30000, "\\N", 20000, 120]]) + system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"], + ["myQuota", "default", 63113904, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 30000, 0, "\\N", 0, 20000, 120]]) instance.query("SELECT * from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"], - ["myQuota", "default", 63113904, 1, "\\N", 0, "\\N", 50, "\\N", 200, 30000, 50, "\\N", 200, 20000, 120]]) + [["myQuota", "default", 31556952, 1, 1000, 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"], + ["myQuota", "default", 63113904, 1, "\\N", 1, "\\N", 0, "\\N", 0, "\\N", 50, "\\N", 200, 30000, 50, "\\N", 200, 20000, 120]]) # Remove interval. copy_quota_xml('normal_limits.xml') check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) system_quota_usage( - [["myQuota", "default", 31556952, 1, 1000, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 1, 1000, 1, 500, 0, 500, 0, "\\N", 50, "\\N", 200, "\\N", 50, 1000, 200, "\\N", "\\N"]]) instance.query("SELECT * from test_table") system_quota_usage( - [["myQuota", "default", 31556952, 2, 1000, 0, "\\N", 100, "\\N", 400, "\\N", 100, 1000, 400, "\\N", "\\N"]]) + [["myQuota", "default", 31556952, 2, 1000, 2, 500, 0, 500, 0, "\\N", 100, "\\N", 400, "\\N", 100, 1000, 400, "\\N", "\\N"]]) # Remove all intervals. copy_quota_xml('simpliest.xml') @@ -187,26 +191,26 @@ def test_add_remove_interval(): "['default']", "[]"]]) system_quota_limits("") system_quota_usage( - [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) + [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) instance.query("SELECT * from test_table") system_quota_usage( - [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) + [["myQuota", "default", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N"]]) # Add one interval back. copy_quota_xml('normal_limits.xml') check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) - system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) def test_add_remove_quota(): check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) system_quotas_usage( - [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) # Add quota. copy_quota_xml('two_quotas.xml') @@ -214,19 +218,19 @@ def test_add_remove_quota(): 0, "['default']", "[]"], ["myQuota2", "4590510c-4d13-bf21-ec8a-c2187b092e73", "users.xml", "['client_key','user_name']", "[3600,2629746]", 0, "[]", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"], - ["myQuota2", 3600, 1, "\\N", "\\N", 4000, 400000, 4000, 400000, 60], - ["myQuota2", 2629746, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", 1800]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", "\\N", "\\N", 1000, "\\N", "\\N"], + ["myQuota2", 3600, 1, "\\N", "\\N", "\\N", "\\N", 4000, 400000, 4000, 400000, 60], + ["myQuota2", 2629746, 0, "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", "\\N", 1800]]) system_quotas_usage( - [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) # Drop quota. copy_quota_xml('normal_limits.xml') check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) system_quotas_usage( - [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) # Drop all quotas. copy_quota_xml('no_quotas.xml') @@ -238,15 +242,15 @@ def test_add_remove_quota(): copy_quota_xml('normal_limits.xml') check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) system_quotas_usage( - [["myQuota", "default", 1, 31556952, 0, 1000, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) def test_reload_users_xml_by_timer(): check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", "[31556952]", 0, "['default']", "[]"]]) - system_quota_limits([["myQuota", 31556952, 0, 1000, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) time.sleep(1) # The modification time of the 'quota.xml' file should be different, # because config files are reload by timer only when the modification time is changed. @@ -255,25 +259,25 @@ def test_reload_users_xml_by_timer(): ["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", ['user_name'], "[31556952]", 0, "['default']", "[]"]]) assert_eq_with_retry(instance, "SELECT * FROM system.quota_limits", - [["myQuota", 31556952, 0, 1, 1, 1, "\\N", 1, "\\N", "\\N"]]) + [["myQuota", 31556952, 0, 1, 1, 1, 1, 1, "\\N", 1, "\\N", "\\N"]]) def test_dcl_introspection(): assert instance.query("SHOW QUOTAS") == "myQuota\n" assert instance.query( - "SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" + "SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n" assert instance.query( - "SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" + "SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n" assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n", + "myQuota\\tdefault\\t.*\\t31556952\\t0\\t1000\\t0\\t500\\t0\\t500\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t1000\\t0\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) instance.query("SELECT * from test_table") assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) - expected_access = "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" + expected_access = "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, query_selects = 500, query_inserts = 500, read_rows = 1000 TO default\n" assert expected_access in instance.query("SHOW ACCESS") # Add interval. @@ -282,8 +286,8 @@ def test_dcl_introspection(): assert instance.query( "SHOW CREATE QUOTA") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000, FOR RANDOMIZED INTERVAL 2 year MAX result_bytes = 30000, read_bytes = 20000, execution_time = 120 TO default\n" assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n" - "myQuota\\tdefault\\t.*\\t63113904\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t30000\\t0\\t\\\\N\\t0\\t20000\\t.*\\t120", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n" + "myQuota\\tdefault\\t.*\\t63113904\\t0\\t\\\\N\t0\\t\\\\N\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t30000\\t0\\t\\\\N\\t0\\t20000\\t.*\\t120", instance.query("SHOW QUOTA")) # Drop interval, add quota. @@ -297,7 +301,7 @@ def test_dcl_introspection(): "SHOW CREATE QUOTAS") == "CREATE QUOTA myQuota KEYED BY user_name FOR INTERVAL 1 year MAX queries = 1000, read_rows = 1000 TO default\n" \ "CREATE QUOTA myQuota2 KEYED BY client_key, user_name FOR RANDOMIZED INTERVAL 1 hour MAX result_rows = 4000, result_bytes = 400000, read_rows = 4000, read_bytes = 400000, execution_time = 60, FOR INTERVAL 1 month MAX execution_time = 1800\n" assert re.match( - "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t1000\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) # Drop all quotas. @@ -315,12 +319,12 @@ def test_dcl_management(): assert instance.query( "SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 5 quarter MAX queries = 123 TO default\n" assert re.match( - "qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t39446190\\t0\\t123\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) instance.query("SELECT * from test_table") assert re.match( - "qA\\t\\t.*\\t39446190\\t1\\t123\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t39446190\\t1\\t123\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) instance.query( @@ -328,37 +332,37 @@ def test_dcl_management(): assert instance.query( "SHOW CREATE QUOTA qA") == "CREATE QUOTA qA FOR INTERVAL 30 minute MAX execution_time = 0.5, FOR INTERVAL 5 quarter MAX queries = 321, errors = 10 TO default\n" assert re.match( - "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n" - "qA\\t\\t.*\\t39446190\\t1\\t321\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t1800\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t0.5\n" + "qA\\t\\t.*\\t39446190\\t1\\t321\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) instance.query("SELECT * from test_table") assert re.match( - "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n" - "qA\\t\\t.*\\t39446190\\t2\\t321\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t1800\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t0.5\n" + "qA\\t\\t.*\\t39446190\\t2\\t321\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t10\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) instance.query( "ALTER QUOTA qA FOR INTERVAL 15 MONTH NO LIMITS, FOR RANDOMIZED INTERVAL 16 MONTH TRACKING ONLY, FOR INTERVAL 1800 SECOND NO LIMITS") assert re.match( - "qA\\t\\t.*\\t42075936\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t42075936\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) instance.query("SELECT * from test_table") assert re.match( - "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "qA\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) instance.query("ALTER QUOTA qA RENAME TO qB") assert instance.query( "SHOW CREATE QUOTA qB") == "CREATE QUOTA qB FOR RANDOMIZED INTERVAL 16 month TRACKING ONLY TO default\n" assert re.match( - "qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", + "qB\\t\\t.*\\t42075936\\t1\\t\\\\N\\t1\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t50\\t\\\\N\\t200\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) instance.query("SELECT * from test_table") assert re.match( - "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n", + "qB\\t\\t.*\\t42075936\\t2\\t\\\\N\\t2\\t\\\\N\\t0\\t\\\\N\\t0\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t100\\t\\\\N\\t400\\t\\\\N\\t.*\\t\\\\N\n", instance.query("SHOW QUOTA")) instance.query("DROP QUOTA qB") @@ -367,3 +371,22 @@ def test_dcl_management(): def test_users_xml_is_readonly(): assert re.search("storage is readonly", instance.query_and_get_error("DROP QUOTA myQuota")) + +def test_query_inserts(): + check_system_quotas([["myQuota", "e651da9c-a748-8703-061a-7e5e5096dae7", "users.xml", "['user_name']", [31556952], + 0, "['default']", "[]"]]) + system_quota_limits([["myQuota", 31556952, 0, 1000, 500, 500, "\\N", "\\N", "\\N", 1000, "\\N", "\\N"]]) + system_quota_usage([["myQuota", "default", 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + system_quotas_usage( + [["myQuota", "default", 1, 31556952, 0, 1000, 0, 500, 0, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + + instance.query("INSERT INTO test_table values(1)") + system_quota_usage( + [["myQuota", "default", 31556952, 1, 1000, 0, 500, 1, 500, 0, "\\N", 0, "\\N", 0, "\\N", 0, 1000, 0, "\\N", "\\N"]]) + +def test_consumption_show_tables_quota(): + instance.query("SHOW TABLES") + + assert re.match( + "myQuota\\tdefault\\t.*\\t31556952\\t1\\t1000\\t1\\t500\\t0\\t500\\t0\\t\\\\N\\t1\\t\\\\N\\t19\\t\\\\N\\t1\\t1000\\t35\\t\\\\N\\t.*\\t\\\\N\n", + instance.query("SHOW QUOTA")) diff --git a/tests/integration/test_quota/tiny_limits.xml b/tests/integration/test_quota/tiny_limits.xml index 3ab8858738a..4797c360ddd 100644 --- a/tests/integration/test_quota/tiny_limits.xml +++ b/tests/integration/test_quota/tiny_limits.xml @@ -8,6 +8,8 @@ 1 + 1 + 1 1 1 1 diff --git a/tests/integration/test_quota/tracking.xml b/tests/integration/test_quota/tracking.xml index 47e12bf8005..c5e7c993edc 100644 --- a/tests/integration/test_quota/tracking.xml +++ b/tests/integration/test_quota/tracking.xml @@ -8,6 +8,8 @@ 0 + 0 + 0 0 0 0 diff --git a/tests/integration/test_read_temporary_tables_on_failure/test.py b/tests/integration/test_read_temporary_tables_on_failure/test.py index f7df52f67e9..e62c7c9eaec 100644 --- a/tests/integration/test_read_temporary_tables_on_failure/test.py +++ b/tests/integration/test_read_temporary_tables_on_failure/test.py @@ -19,7 +19,7 @@ def start_cluster(): def test_different_versions(start_cluster): with pytest.raises(QueryTimeoutExceedException): - node.query("SELECT sleep(3)", timeout=1) + node.query("SELECT sleepEachRow(3) FROM numbers(10)", timeout=5) with pytest.raises(QueryRuntimeException): node.query("SELECT 1", settings={'max_concurrent_queries_for_user': 1}) assert node.contains_in_log('Too many simultaneous queries for user') diff --git a/tests/integration/test_replicated_database/__init__.py b/tests/integration/test_replicated_database/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml new file mode 100644 index 00000000000..ebceee3aa5c --- /dev/null +++ b/tests/integration/test_replicated_database/configs/config.xml @@ -0,0 +1,34 @@ + + 10 + + + + + true + + main_node + 9000 + + + dummy_node + 9000 + + + competing_node + 9000 + + + + true + + snapshotting_node + 9000 + + + snapshot_recovering_node + 9000 + + + + + diff --git a/tests/integration/test_replicated_database/configs/settings.xml b/tests/integration/test_replicated_database/configs/settings.xml new file mode 100644 index 00000000000..e0f7e8691e6 --- /dev/null +++ b/tests/integration/test_replicated_database/configs/settings.xml @@ -0,0 +1,12 @@ + + + + 1 + + + + + default + + + diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py new file mode 100644 index 00000000000..99e7d6077f8 --- /dev/null +++ b/tests/integration/test_replicated_database/test.py @@ -0,0 +1,278 @@ +import time +import re +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry, assert_logs_contain +from helpers.network import PartitionManager + +cluster = ClickHouseCluster(__file__) + +main_node = cluster.add_instance('main_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 1}) +dummy_node = cluster.add_instance('dummy_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, stay_alive=True, macros={"shard": 1, "replica": 2}) +competing_node = cluster.add_instance('competing_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, macros={"shard": 1, "replica": 3}) +snapshotting_node = cluster.add_instance('snapshotting_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 1}) +snapshot_recovering_node = cluster.add_instance('snapshot_recovering_node', main_configs=['configs/config.xml'], user_configs=['configs/settings.xml'], with_zookeeper=True, macros={"shard": 2, "replica": 2}) + +all_nodes = [main_node, dummy_node, competing_node, snapshotting_node, snapshot_recovering_node] + +uuid_regex = re.compile("[0-9a-f]{8}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{4}\-[0-9a-f]{12}") +def assert_create_query(nodes, table_name, expected): + replace_uuid = lambda x: re.sub(uuid_regex, "uuid", x) + query = "show create table {}".format(table_name) + for node in nodes: + assert_eq_with_retry(node, query, expected, get_result=replace_uuid) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');") + dummy_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica2');") + yield cluster + + finally: + cluster.shutdown() + +def test_create_replicated_table(started_cluster): + assert "Old syntax is not allowed" in \ + main_node.query_and_get_error("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/test/tmp', 'r', d, k, 8192);") + + main_node.query("CREATE TABLE testdb.replicated_table (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree ORDER BY k PARTITION BY toYYYYMM(d);") + + expected = "CREATE TABLE testdb.replicated_table\\n(\\n `d` Date,\\n `k` UInt64,\\n `i32` Int32\\n)\\n" \ + "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\n" \ + "PARTITION BY toYYYYMM(d)\\nORDER BY k\\nSETTINGS index_granularity = 8192" + assert_create_query([main_node, dummy_node], "testdb.replicated_table", expected) + # assert without replacing uuid + assert main_node.query("show create testdb.replicated_table") == dummy_node.query("show create testdb.replicated_table") + +@pytest.mark.parametrize("engine", ['MergeTree', 'ReplicatedMergeTree']) +def test_simple_alter_table(started_cluster, engine): + # test_simple_alter_table + name = "testdb.alter_test_{}".format(engine) + main_node.query("CREATE TABLE {} " + "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) " + "ENGINE = {} PARTITION BY StartDate ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID);".format(name, engine)) + main_node.query("ALTER TABLE {} ADD COLUMN Added0 UInt32;".format(name)) + main_node.query("ALTER TABLE {} ADD COLUMN Added2 UInt32;".format(name)) + main_node.query("ALTER TABLE {} ADD COLUMN Added1 UInt32 AFTER Added0;".format(name)) + main_node.query("ALTER TABLE {} ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;".format(name)) + main_node.query("ALTER TABLE {} ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;".format(name)) + main_node.query("ALTER TABLE {} ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;".format(name)) + + full_engine = engine if not "Replicated" in engine else engine + "(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')" + expected = "CREATE TABLE {}\\n(\\n `CounterID` UInt32,\\n `StartDate` Date,\\n `UserID` UInt32,\\n" \ + " `VisitID` UInt32,\\n `NestedColumn.A` Array(UInt8),\\n `NestedColumn.S` Array(String),\\n" \ + " `ToDrop` UInt32,\\n `Added0` UInt32,\\n `Added1` UInt32,\\n `Added2` UInt32,\\n" \ + " `AddedNested1.A` Array(UInt32),\\n `AddedNested1.B` Array(UInt64),\\n `AddedNested1.C` Array(String),\\n" \ + " `AddedNested2.A` Array(UInt32),\\n `AddedNested2.B` Array(UInt64)\\n)\\n" \ + "ENGINE = {}\\nPARTITION BY StartDate\\nORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)\\n" \ + "SETTINGS index_granularity = 8192".format(name, full_engine) + + assert_create_query([main_node, dummy_node], name, expected) + + # test_create_replica_after_delay + competing_node.query("CREATE DATABASE IF NOT EXISTS testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');") + + name = "testdb.alter_test_{}".format(engine) + main_node.query("ALTER TABLE {} ADD COLUMN Added3 UInt32;".format(name)) + main_node.query("ALTER TABLE {} DROP COLUMN AddedNested1;".format(name)) + main_node.query("ALTER TABLE {} RENAME COLUMN Added1 TO AddedNested1;".format(name)) + + full_engine = engine if not "Replicated" in engine else engine + "(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')" + expected = "CREATE TABLE {}\\n(\\n `CounterID` UInt32,\\n `StartDate` Date,\\n `UserID` UInt32,\\n" \ + " `VisitID` UInt32,\\n `NestedColumn.A` Array(UInt8),\\n `NestedColumn.S` Array(String),\\n" \ + " `ToDrop` UInt32,\\n `Added0` UInt32,\\n `AddedNested1` UInt32,\\n `Added2` UInt32,\\n" \ + " `AddedNested2.A` Array(UInt32),\\n `AddedNested2.B` Array(UInt64),\\n `Added3` UInt32\\n)\\n" \ + "ENGINE = {}\\nPARTITION BY StartDate\\nORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)\\n" \ + "SETTINGS index_granularity = 8192".format(name, full_engine) + + assert_create_query([main_node, dummy_node, competing_node], name, expected) + + +def test_alters_from_different_replicas(started_cluster): + # test_alters_from_different_replicas + competing_node.query("CREATE DATABASE IF NOT EXISTS testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica3');") + + main_node.query("CREATE TABLE testdb.concurrent_test " + "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) " + "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192);") + + main_node.query("CREATE TABLE testdb.dist AS testdb.concurrent_test ENGINE = Distributed(cluster, testdb, concurrent_test, CounterID)") + + dummy_node.stop_clickhouse(kill=True) + + settings = {"distributed_ddl_task_timeout": 10} + assert "There are 1 unfinished hosts (0 of them are currently active)" in \ + competing_node.query_and_get_error("ALTER TABLE testdb.concurrent_test ADD COLUMN Added0 UInt32;", settings=settings) + dummy_node.start_clickhouse() + main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added2 UInt32;") + competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN Added1 UInt32 AFTER Added0;") + main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1 Nested(A UInt32, B UInt64) AFTER Added2;") + competing_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested1.C Array(String) AFTER AddedNested1.B;") + main_node.query("ALTER TABLE testdb.concurrent_test ADD COLUMN AddedNested2 Nested(A UInt32, B UInt64) AFTER AddedNested1;") + + expected = "CREATE TABLE testdb.concurrent_test\\n(\\n `CounterID` UInt32,\\n `StartDate` Date,\\n `UserID` UInt32,\\n" \ + " `VisitID` UInt32,\\n `NestedColumn.A` Array(UInt8),\\n `NestedColumn.S` Array(String),\\n `ToDrop` UInt32,\\n" \ + " `Added0` UInt32,\\n `Added1` UInt32,\\n `Added2` UInt32,\\n `AddedNested1.A` Array(UInt32),\\n" \ + " `AddedNested1.B` Array(UInt64),\\n `AddedNested1.C` Array(String),\\n `AddedNested2.A` Array(UInt32),\\n" \ + " `AddedNested2.B` Array(UInt64)\\n)\\n" \ + "ENGINE = MergeTree(StartDate, intHash32(UserID), (CounterID, StartDate, intHash32(UserID), VisitID), 8192)" + + assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected) + + # test_create_replica_after_delay + main_node.query("DROP TABLE testdb.concurrent_test") + main_node.query("CREATE TABLE testdb.concurrent_test " + "(CounterID UInt32, StartDate Date, UserID UInt32, VisitID UInt32, NestedColumn Nested(A UInt8, S String), ToDrop UInt32) " + "ENGINE = ReplicatedMergeTree ORDER BY CounterID;") + + expected = "CREATE TABLE testdb.concurrent_test\\n(\\n `CounterID` UInt32,\\n `StartDate` Date,\\n `UserID` UInt32,\\n" \ + " `VisitID` UInt32,\\n `NestedColumn.A` Array(UInt8),\\n `NestedColumn.S` Array(String),\\n `ToDrop` UInt32\\n)\\n" \ + "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192" + + assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected) + + main_node.query("INSERT INTO testdb.dist (CounterID, StartDate, UserID) SELECT number, addDays(toDate('2020-02-02'), number), intHash32(number) FROM numbers(10)") + + # test_replica_restart + main_node.restart_clickhouse() + + expected = "CREATE TABLE testdb.concurrent_test\\n(\\n `CounterID` UInt32,\\n `StartDate` Date,\\n `UserID` UInt32,\\n" \ + " `VisitID` UInt32,\\n `NestedColumn.A` Array(UInt8),\\n `NestedColumn.S` Array(String),\\n `ToDrop` UInt32\\n)\\n" \ + "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192" + + + # test_snapshot_and_snapshot_recover + snapshotting_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica1');") + snapshot_recovering_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica2');") + assert_create_query(all_nodes, "testdb.concurrent_test", expected) + + main_node.query("SYSTEM FLUSH DISTRIBUTED testdb.dist") + main_node.query("ALTER TABLE testdb.concurrent_test UPDATE StartDate = addYears(StartDate, 1) WHERE 1") + res = main_node.query("ALTER TABLE testdb.concurrent_test DELETE WHERE UserID % 2") + assert "shard1|replica1" in res and "shard1|replica2" in res and "shard1|replica3" in res + assert "shard2|replica1" in res and "shard2|replica2" in res + + expected = "1\t1\tmain_node\n" \ + "1\t2\tdummy_node\n" \ + "1\t3\tcompeting_node\n" \ + "2\t1\tsnapshotting_node\n" \ + "2\t2\tsnapshot_recovering_node\n" + assert main_node.query("SELECT shard_num, replica_num, host_name FROM system.clusters WHERE cluster='testdb'") == expected + + # test_drop_and_create_replica + main_node.query("DROP DATABASE testdb SYNC") + main_node.query("CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');") + + expected = "CREATE TABLE testdb.concurrent_test\\n(\\n `CounterID` UInt32,\\n `StartDate` Date,\\n `UserID` UInt32,\\n" \ + " `VisitID` UInt32,\\n `NestedColumn.A` Array(UInt8),\\n `NestedColumn.S` Array(String),\\n `ToDrop` UInt32\\n)\\n" \ + "ENGINE = ReplicatedMergeTree(\\'/clickhouse/tables/uuid/{shard}\\', \\'{replica}\\')\\nORDER BY CounterID\\nSETTINGS index_granularity = 8192" + + assert_create_query([main_node, competing_node], "testdb.concurrent_test", expected) + assert_create_query(all_nodes, "testdb.concurrent_test", expected) + + for node in all_nodes: + node.query("SYSTEM SYNC REPLICA testdb.concurrent_test") + + expected = "0\t2021-02-02\t4249604106\n" \ + "1\t2021-02-03\t1343103100\n" \ + "4\t2021-02-06\t3902320246\n" \ + "7\t2021-02-09\t3844986530\n" \ + "9\t2021-02-11\t1241149650\n" + + assert_eq_with_retry(dummy_node, "SELECT CounterID, StartDate, UserID FROM testdb.dist ORDER BY CounterID", expected) + +def test_recover_staled_replica(started_cluster): + main_node.query("CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica1');") + started_cluster.get_kazoo_client('zoo1').set('/clickhouse/databases/recover/logs_to_keep', b'10') + dummy_node.query("CREATE DATABASE recover ENGINE = Replicated('/clickhouse/databases/recover', 'shard1', 'replica2');") + + settings = {"distributed_ddl_task_timeout": 0} + main_node.query("CREATE TABLE recover.t1 (n int) ENGINE=Memory", settings=settings) + dummy_node.query("CREATE TABLE recover.t2 (s String) ENGINE=Memory", settings=settings) + main_node.query("CREATE TABLE recover.mt1 (n int) ENGINE=MergeTree order by n", settings=settings) + dummy_node.query("CREATE TABLE recover.mt2 (n int) ENGINE=MergeTree order by n", settings=settings) + main_node.query("CREATE TABLE recover.rmt1 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings) + dummy_node.query("CREATE TABLE recover.rmt2 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings) + main_node.query("CREATE TABLE recover.rmt3 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings) + dummy_node.query("CREATE TABLE recover.rmt5 (n int) ENGINE=ReplicatedMergeTree order by n", settings=settings) + main_node.query("CREATE DICTIONARY recover.d1 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())") + dummy_node.query("CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt2' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT())") + + for table in ['t1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'rmt3', 'rmt5']: + main_node.query("INSERT INTO recover.{} VALUES (42)".format(table)) + for table in ['t1', 't2', 'mt1', 'mt2']: + dummy_node.query("INSERT INTO recover.{} VALUES (42)".format(table)) + for table in ['rmt1', 'rmt2', 'rmt3', 'rmt5']: + main_node.query("SYSTEM SYNC REPLICA recover.{}".format(table)) + + with PartitionManager() as pm: + pm.drop_instance_zk_connections(dummy_node) + dummy_node.query_and_get_error("RENAME TABLE recover.t1 TO recover.m1") + main_node.query("RENAME TABLE recover.t1 TO recover.m1", settings=settings) + main_node.query("ALTER TABLE recover.mt1 ADD COLUMN m int", settings=settings) + main_node.query("ALTER TABLE recover.rmt1 ADD COLUMN m int", settings=settings) + main_node.query("RENAME TABLE recover.rmt3 TO recover.rmt4", settings=settings) + main_node.query("DROP TABLE recover.rmt5", settings=settings) + main_node.query("DROP DICTIONARY recover.d2", settings=settings) + main_node.query("CREATE DICTIONARY recover.d2 (n int DEFAULT 0, m int DEFAULT 1) PRIMARY KEY n SOURCE(CLICKHOUSE(HOST 'localhost' PORT 9000 USER 'default' TABLE 'rmt1' PASSWORD '' DB 'recover')) LIFETIME(MIN 1 MAX 10) LAYOUT(FLAT());", settings=settings) + + main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings) + main_node.query("DROP TABLE recover.tmp", settings=settings) + main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings) + main_node.query("DROP TABLE recover.tmp", settings=settings) + main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings) + main_node.query("DROP TABLE recover.tmp", settings=settings) + main_node.query("CREATE TABLE recover.tmp AS recover.m1", settings=settings) + + assert main_node.query("SELECT name FROM system.tables WHERE database='recover' ORDER BY name") == "d1\nd2\nm1\nmt1\nmt2\nrmt1\nrmt2\nrmt4\nt2\ntmp\n" + query = "SELECT name, uuid, create_table_query FROM system.tables WHERE database='recover' ORDER BY name" + expected = main_node.query(query) + assert_eq_with_retry(dummy_node, query, expected) + + for table in ['m1', 't2', 'mt1', 'mt2', 'rmt1', 'rmt2', 'rmt4', 'd1', 'd2']: + assert main_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n" + for table in ['t2', 'rmt1', 'rmt2', 'rmt4', 'd1', 'd2', 'mt2']: + assert dummy_node.query("SELECT (*,).1 FROM recover.{}".format(table)) == "42\n" + for table in ['m1', 'mt1']: + assert dummy_node.query("SELECT count() FROM recover.{}".format(table)) == "0\n" + + assert dummy_node.query("SELECT count() FROM system.tables WHERE database='recover_broken_tables'") == "2\n" + table = dummy_node.query("SHOW TABLES FROM recover_broken_tables LIKE 'mt1_26_%'").strip() + assert dummy_node.query("SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n" + table = dummy_node.query("SHOW TABLES FROM recover_broken_tables LIKE 'rmt5_26_%'").strip() + assert dummy_node.query("SELECT (*,).1 FROM recover_broken_tables.{}".format(table)) == "42\n" + + expected = "Cleaned 4 outdated objects: dropped 1 dictionaries and 1 tables, moved 2 tables" + assert_logs_contain(dummy_node, expected) + + dummy_node.query("DROP TABLE recover.tmp") + assert_eq_with_retry(main_node, "SELECT count() FROM system.tables WHERE database='recover' AND name='tmp'", "0\n") + +def test_startup_without_zk(started_cluster): + main_node.query("DROP DATABASE IF EXISTS testdb SYNC") + main_node.query("DROP DATABASE IF EXISTS recover SYNC") + with PartitionManager() as pm: + pm.drop_instance_zk_connections(main_node) + err = main_node.query_and_get_error("CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');") + assert "ZooKeeper" in err + main_node.query("CREATE DATABASE startup ENGINE = Replicated('/clickhouse/databases/startup', 'shard1', 'replica1');") + #main_node.query("CREATE TABLE startup.rmt (n int) ENGINE=ReplicatedMergeTree order by n") + main_node.query("CREATE TABLE startup.rmt (n int) ENGINE=MergeTree order by n") + main_node.query("INSERT INTO startup.rmt VALUES (42)") + with PartitionManager() as pm: + pm.drop_instance_zk_connections(main_node) + main_node.restart_clickhouse(stop_start_wait_sec=30) + assert main_node.query("SELECT (*,).1 FROM startup.rmt") == "42\n" + + for _ in range(10): + try: + main_node.query("CREATE TABLE startup.m (n int) ENGINE=Memory") + break + except: + time.sleep(1) + + main_node.query("EXCHANGE TABLES startup.rmt AND startup.m") + assert main_node.query("SELECT (*,).1 FROM startup.m") == "42\n" diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py index a3c35ca1537..65d49637b13 100644 --- a/tests/integration/test_send_crash_reports/test.py +++ b/tests/integration/test_send_crash_reports/test.py @@ -24,14 +24,17 @@ def started_node(): def test_send_segfault(started_node, ): + if started_node.is_built_with_thread_sanitizer(): + pytest.skip("doesn't fit in timeouts for stacktrace generation") + started_node.copy_file_to_container(os.path.join(SCRIPT_DIR, "fake_sentry_server.py"), "/fake_sentry_server.py") started_node.exec_in_container(["bash", "-c", "python3 /fake_sentry_server.py > /fake_sentry_server.log 2>&1"], detach=True, user="root") - time.sleep(0.5) + time.sleep(1) started_node.exec_in_container(["bash", "-c", "pkill -11 clickhouse"], user="root") result = None for attempt in range(1, 6): - time.sleep(0.25 * attempt) + time.sleep(attempt) result = started_node.exec_in_container(['cat', fake_sentry_server.RESULT_PATH], user='root') if result == 'OK': break diff --git a/tests/integration/test_settings_profile/test.py b/tests/integration/test_settings_profile/test.py index 3ceef9f25cf..1945875bf53 100644 --- a/tests/integration/test_settings_profile/test.py +++ b/tests/integration/test_settings_profile/test.py @@ -46,7 +46,7 @@ def reset_after_test(): def test_smoke(): - # Set settings and constraints via CREATE SETTINGS PROFILE ... TO user + # Set settings and constraints via CREATE SETTINGS PROFILE ... TO user instance.query( "CREATE SETTINGS PROFILE xyz SETTINGS max_memory_usage = 100000001 MIN 90000000 MAX 110000000 TO robin") assert instance.query( @@ -194,13 +194,13 @@ def test_show_profiles(): assert instance.query("SHOW CREATE PROFILE xyz") == "CREATE SETTINGS PROFILE xyz\n" assert instance.query( - "SHOW CREATE SETTINGS PROFILE default") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n" + "SHOW CREATE SETTINGS PROFILE default") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" assert instance.query( - "SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n" \ + "SHOW CREATE PROFILES") == "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" \ "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n" \ "CREATE SETTINGS PROFILE xyz\n" - expected_access = "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, use_uncompressed_cache = 0, load_balancing = \\'random\\'\n" \ + expected_access = "CREATE SETTINGS PROFILE default SETTINGS max_memory_usage = 10000000000, load_balancing = \\'random\\'\n" \ "CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1\n" \ "CREATE SETTINGS PROFILE xyz\n" assert expected_access in instance.query("SHOW ACCESS") @@ -210,7 +210,7 @@ def test_allow_ddl(): assert "it's necessary to have grant" in instance.query_and_get_error("CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin") assert "it's necessary to have grant" in instance.query_and_get_error("GRANT CREATE ON tbl TO robin", user="robin") assert "DDL queries are prohibited" in instance.query_and_get_error("CREATE TABLE tbl(a Int32) ENGINE=Log", settings={"allow_ddl": 0}) - + instance.query("GRANT CREATE ON tbl TO robin") instance.query("CREATE TABLE tbl(a Int32) ENGINE=Log", user="robin") instance.query("DROP TABLE tbl") diff --git a/tests/integration/test_storage_kafka/configs/kafka_macros.xml b/tests/integration/test_storage_kafka/configs/kafka_macros.xml deleted file mode 100644 index 7f6cfb5eb1f..00000000000 --- a/tests/integration/test_storage_kafka/configs/kafka_macros.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - kafka1 - old - old - - new - new - instance - JSONEachRow - - \ No newline at end of file diff --git a/tests/integration/test_storage_kafka/test.py b/tests/integration/test_storage_kafka/test.py index 1f31cbdbbc7..5f2726832cc 100644 --- a/tests/integration/test_storage_kafka/test.py +++ b/tests/integration/test_storage_kafka/test.py @@ -39,9 +39,16 @@ from . import social_pb2 cluster = ClickHouseCluster(__file__) instance = cluster.add_instance('instance', - main_configs=['configs/kafka.xml', 'configs/log_conf.xml', 'configs/kafka_macros.xml'], + main_configs=['configs/kafka.xml', 'configs/log_conf.xml'], with_kafka=True, with_zookeeper=True, + macros={"kafka_broker":"kafka1", + "kafka_topic_old":"old", + "kafka_group_name_old":"old", + "kafka_topic_new":"new", + "kafka_group_name_new":"new", + "kafka_client_id":"instance", + "kafka_format_json_each_row":"JSONEachRow"}, clickhouse_path_dir='clickhouse_path') kafka_id = '' @@ -1732,6 +1739,11 @@ def test_kafka_produce_key_timestamp(kafka_cluster): @pytest.mark.timeout(600) def test_kafka_flush_by_time(kafka_cluster): + admin_client = KafkaAdminClient(bootstrap_servers="localhost:9092") + topic_list = [] + topic_list.append(NewTopic(name="flush_by_time", num_partitions=1, replication_factor=1)) + admin_client.create_topics(new_topics=topic_list, validate_only=False) + instance.query(''' DROP TABLE IF EXISTS test.view; DROP TABLE IF EXISTS test.consumer; @@ -1771,7 +1783,7 @@ def test_kafka_flush_by_time(kafka_cluster): time.sleep(18) - result = instance.query('SELECT uniqExact(ts) = 2, count() > 15 FROM test.view') + result = instance.query('SELECT uniqExact(ts) = 2, count() >= 15 FROM test.view') cancel.set() kafka_thread.join() @@ -2357,9 +2369,9 @@ def test_premature_flush_on_eof(kafka_cluster): ''') # messages created here will be consumed immedeately after MV creation - # reaching topic EOF. + # reaching topic EOF. # But we should not do flush immedeately after reaching EOF, because - # next poll can return more data, and we should respect kafka_flush_interval_ms + # next poll can return more data, and we should respect kafka_flush_interval_ms # and try to form bigger block messages = [json.dumps({'key': j + 1, 'value': j + 1}) for j in range(1)] kafka_produce('premature_flush_on_eof', messages) @@ -2379,11 +2391,11 @@ def test_premature_flush_on_eof(kafka_cluster): # all subscriptions/assignments done during select, so it start sending data to test.destination # immediately after creation of MV - + time.sleep(1.5) # that sleep is needed to ensure that first poll finished, and at least one 'empty' polls happened. # Empty poll before the fix were leading to premature flush. - # TODO: wait for messages in log: "Polled batch of 1 messages", followed by "Stalled" - + # TODO: wait for messages in log: "Polled batch of 1 messages", followed by "Stalled" + # produce more messages after delay kafka_produce('premature_flush_on_eof', messages) diff --git a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh index 38f098ae1e1..971491d4053 100755 --- a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh +++ b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh @@ -34,11 +34,11 @@ cat >> /usr/local/hadoop/etc/hadoop/core-site.xml << EOF fs.defaultFS - hdfs://kerberizedhdfs1:9000 + hdfs://kerberizedhdfs1:9010 fs.default.name - hdfs://kerberizedhdfs1:9000 + hdfs://kerberizedhdfs1:9010 + + + + + SELECT * FROM + (SELECT EventTime, + count(*) OVER (ORDER BY EventTime ASC + RANGE BETWEEN 10 PRECEDING AND 10 FOLLOWING) AS c + FROM hits_10m_single) + FORMAT Null + + + + + select * from + (select EventTime, + count(*) over (partition by + floor((toUInt32(EventTime) + 10 + 1) / 20)) as c + from hits_10m_single) + format Null + + + + + select + min(number) over w, + count(*) over w, + max(number) over w + from + (select number, intDiv(number, 1111) p, mod(number, 111) o + from numbers(10000000)) t + window w as (partition by p order by o) + format Null + + + + select + first_value(number) over w, + dense_rank() over w + from + (select number, intDiv(number, 1111) p, mod(number, 111) o + from numbers(10000000)) t + window w as (partition by p order by o) + format Null + + diff --git a/tests/queries/0_stateless/00011_array_join_alias.sql b/tests/queries/0_stateless/00011_array_join_alias.sql index 228038c1509..5eafeddb8fe 100644 --- a/tests/queries/0_stateless/00011_array_join_alias.sql +++ b/tests/queries/0_stateless/00011_array_join_alias.sql @@ -1 +1,2 @@ -SELECT x, a FROM (SELECT arrayJoin(['Hello', 'Goodbye']) AS x, [1, 2, 3] AS arr) ARRAY JOIN arr AS a +SELECT x, a FROM (SELECT arrayJoin(['Hello', 'Goodbye']) AS x, [1, 2, 3] AS arr) ARRAY JOIN; -- { serverError 42 } +SELECT x, a FROM (SELECT arrayJoin(['Hello', 'Goodbye']) AS x, [1, 2, 3] AS arr) ARRAY JOIN arr AS a; diff --git a/tests/queries/0_stateless/00302_http_compression.sh b/tests/queries/0_stateless/00302_http_compression.sh index 829475e8602..cfa9a930f09 100755 --- a/tests/queries/0_stateless/00302_http_compression.sh +++ b/tests/queries/0_stateless/00302_http_compression.sh @@ -4,6 +4,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +if ! command -v gzip &> /dev/null; then echo "gzip not found" 1>&2; exit 1; fi +if ! command -v brotli &> /dev/null; then echo "brotli not found" 1>&2; exit 1; fi +if ! command -v xz &> /dev/null; then echo "xz not found" 1>&2; exit 1; fi +if ! command -v zstd &> /dev/null; then echo "zstd not found" 1>&2; exit 1; fi + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10'; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=0" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10'; ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&enable_http_compression=1" -H 'Accept-Encoding: gzip' -d 'SELECT number FROM system.numbers LIMIT 10' | gzip -d; diff --git a/tests/queries/0_stateless/00341_squashing_insert_select2.sql b/tests/queries/0_stateless/00341_squashing_insert_select2.sql index 469fdaaa64a..3eb5a2682e0 100644 --- a/tests/queries/0_stateless/00341_squashing_insert_select2.sql +++ b/tests/queries/0_stateless/00341_squashing_insert_select2.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS numbers_squashed; -CREATE TABLE numbers_squashed (number UInt8) ENGINE = Memory; +CREATE TABLE numbers_squashed (number UInt8) ENGINE = StripeLog; SET min_insert_block_size_rows = 100; SET min_insert_block_size_bytes = 0; diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh index 0edde9f12ed..0887ecfa14e 100755 --- a/tests/queries/0_stateless/00474_readonly_settings.sh +++ b/tests/queries/0_stateless/00474_readonly_settings.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +unset CLICKHOUSE_LOG_COMMENT + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/00502_sum_map.reference b/tests/queries/0_stateless/00502_sum_map.reference index 0002c43945a..c38fb2ec7d6 100644 --- a/tests/queries/0_stateless/00502_sum_map.reference +++ b/tests/queries/0_stateless/00502_sum_map.reference @@ -22,3 +22,5 @@ ([1.01],[1]) (['a','b'],[1,2]) (['a','ab','abc'],[3,2,1]) +([1,2,3,4,5,6,7,8],[1.00000,2.00000,6.00000,8.00000,10.00000,12.00000,7.00000,8.00000]) +([1,2,3,4,5,6,7,8],[1.00000,2.00000,6.00000,8.00000,10.00000,12.00000,7.00000,8.00000]) diff --git a/tests/queries/0_stateless/00502_sum_map.sql b/tests/queries/0_stateless/00502_sum_map.sql index 021aaf3cd3b..51007a9c78a 100644 --- a/tests/queries/0_stateless/00502_sum_map.sql +++ b/tests/queries/0_stateless/00502_sum_map.sql @@ -38,3 +38,19 @@ select sumMap(val, cnt) from ( SELECT [ CAST(1.01, 'Decimal(10,2)') ] as val, [1 select sumMap(val, cnt) from ( SELECT [ CAST('a', 'FixedString(1)'), CAST('b', 'FixedString(1)' ) ] as val, [1, 2] as cnt ); select sumMap(val, cnt) from ( SELECT [ CAST('abc', 'String'), CAST('ab', 'String'), CAST('a', 'String') ] as val, [1, 2, 3] as cnt ); + +DROP TABLE IF EXISTS sum_map_decimal; + +CREATE TABLE sum_map_decimal( + statusMap Nested( + goal_id UInt16, + revenue Decimal32(5) + ) +) ENGINE = Log; + +INSERT INTO sum_map_decimal VALUES ([1, 2, 3], [1.0, 2.0, 3.0]), ([3, 4, 5], [3.0, 4.0, 5.0]), ([4, 5, 6], [4.0, 5.0, 6.0]), ([6, 7, 8], [6.0, 7.0, 8.0]); + +SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal; +SELECT sumMapWithOverflow(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal; + +DROP TABLE sum_map_decimal; diff --git a/tests/queries/0_stateless/00506_union_distributed.sql b/tests/queries/0_stateless/00506_union_distributed.sql index 0bd4dd43ac9..3f631b8da56 100644 --- a/tests/queries/0_stateless/00506_union_distributed.sql +++ b/tests/queries/0_stateless/00506_union_distributed.sql @@ -1,6 +1,7 @@ - -- https://github.com/ClickHouse/ClickHouse/issues/1059 +SET insert_distributed_sync = 1; + DROP TABLE IF EXISTS union1; DROP TABLE IF EXISTS union2; DROP TABLE IF EXISTS union3; diff --git a/tests/queries/0_stateless/00515_gcd_lcm.sql b/tests/queries/0_stateless/00515_gcd_lcm.sql index c3bf3275bb8..67fab1c9d59 100644 --- a/tests/queries/0_stateless/00515_gcd_lcm.sql +++ b/tests/queries/0_stateless/00515_gcd_lcm.sql @@ -24,18 +24,18 @@ select lcm(2147483647, 2147483646); select lcm(4611686011984936962, 2147483647); select lcm(-2147483648, 1); -- test gcd float -select gcd(1280.1, 1024.1); -- { serverError 48 } -select gcd(11.1, 121.1); -- { serverError 48 } -select gcd(-256.1, 64.1); -- { serverError 48 } -select gcd(1.1, 1.1); -- { serverError 48 } -select gcd(4.1, 2.1); -- { serverError 48 } -select gcd(15.1, 49.1); -- { serverError 48 } -select gcd(255.1, 254.1); -- { serverError 48 } +select gcd(1280.1, 1024.1); -- { serverError 43 } +select gcd(11.1, 121.1); -- { serverError 43 } +select gcd(-256.1, 64.1); -- { serverError 43 } +select gcd(1.1, 1.1); -- { serverError 43 } +select gcd(4.1, 2.1); -- { serverError 43 } +select gcd(15.1, 49.1); -- { serverError 43 } +select gcd(255.1, 254.1); -- { serverError 43 } -- test lcm float -select lcm(1280.1, 1024.1); -- { serverError 48 } -select lcm(11.1, 121.1); -- { serverError 48 } -select lcm(-256.1, 64.1); -- { serverError 48 } -select lcm(1.1, 1.1); -- { serverError 48 } -select lcm(4.1, 2.1); -- { serverError 48 } -select lcm(15.1, 49.1); -- { serverError 48 } -select lcm(255.1, 254.1); -- { serverError 48 } +select lcm(1280.1, 1024.1); -- { serverError 43 } +select lcm(11.1, 121.1); -- { serverError 43 } +select lcm(-256.1, 64.1); -- { serverError 43 } +select lcm(1.1, 1.1); -- { serverError 43 } +select lcm(4.1, 2.1); -- { serverError 43 } +select lcm(15.1, 49.1); -- { serverError 43 } +select lcm(255.1, 254.1); -- { serverError 43 } diff --git a/tests/queries/0_stateless/00597_push_down_predicate.reference b/tests/queries/0_stateless/00597_push_down_predicate.reference index cea533d6ccb..bd1c4791df4 100644 --- a/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -114,7 +114,8 @@ FROM ( SELECT 1 AS id, - identity(cast(1, \'UInt8\')) AS subquery + identity(CAST(1, \'UInt8\')) AS subquery + WHERE subquery = 1 ) WHERE subquery = 1 1 1 diff --git a/tests/queries/0_stateless/00597_push_down_predicate.sql b/tests/queries/0_stateless/00597_push_down_predicate.sql index ea01bba9f4d..ec306ac6792 100644 --- a/tests/queries/0_stateless/00597_push_down_predicate.sql +++ b/tests/queries/0_stateless/00597_push_down_predicate.sql @@ -8,6 +8,8 @@ DROP TABLE IF EXISTS test_view_00597; CREATE TABLE test_00597(date Date, id Int8, name String, value Int64) ENGINE = MergeTree(date, (id, date), 8192); CREATE VIEW test_view_00597 AS SELECT * FROM test_00597; +SELECT * FROM (SELECT floor(floor(1, floor(NULL), id = 257), floor(floor(floor(floor(NULL), '10485.76', '9223372036854775807', NULL), floor(10, floor(65535, NULL), 100.0000991821289), NULL)), '2.56'), b.* FROM (SELECT floor(floor(floor(floor(NULL), 1000.0001220703125))), * FROM test_00597) AS b) WHERE id = 257; + INSERT INTO test_00597 VALUES('2000-01-01', 1, 'test string 1', 1); INSERT INTO test_00597 VALUES('2000-01-01', 2, 'test string 2', 2); diff --git a/tests/queries/0_stateless/00642_cast.reference b/tests/queries/0_stateless/00642_cast.reference index 3d5572932fb..7f5333f590e 100644 --- a/tests/queries/0_stateless/00642_cast.reference +++ b/tests/queries/0_stateless/00642_cast.reference @@ -10,11 +10,11 @@ hello CREATE TABLE default.cast ( `x` UInt8, - `e` Enum8('hello' = 1, 'world' = 2) DEFAULT cast(x, 'Enum8(\'hello\' = 1, \'world\' = 2)') + `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)') ) ENGINE = MergeTree ORDER BY e SETTINGS index_granularity = 8192 x UInt8 -e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT cast(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') +e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') 1 hello diff --git a/tests/queries/0_stateless/00643_cast_zookeeper.reference b/tests/queries/0_stateless/00643_cast_zookeeper.reference index 658233be742..9123463de1a 100644 --- a/tests/queries/0_stateless/00643_cast_zookeeper.reference +++ b/tests/queries/0_stateless/00643_cast_zookeeper.reference @@ -1,12 +1,12 @@ CREATE TABLE default.cast1 ( `x` UInt8, - `e` Enum8('hello' = 1, 'world' = 2) DEFAULT cast(x, 'Enum8(\'hello\' = 1, \'world\' = 2)') + `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)') ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00643/cast', 'r1') ORDER BY e SETTINGS index_granularity = 8192 x UInt8 -e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT cast(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') +e Enum8(\'hello\' = 1, \'world\' = 2) DEFAULT CAST(x, \'Enum8(\\\'hello\\\' = 1, \\\'world\\\' = 2)\') 1 hello 1 hello diff --git a/tests/queries/0_stateless/00643_cast_zookeeper.sql b/tests/queries/0_stateless/00643_cast_zookeeper.sql index c52d44bd88b..c9760f00ca7 100644 --- a/tests/queries/0_stateless/00643_cast_zookeeper.sql +++ b/tests/queries/0_stateless/00643_cast_zookeeper.sql @@ -1,3 +1,5 @@ +SET database_atomic_wait_for_drop_and_detach_synchronously=1; + DROP TABLE IF EXISTS cast1; DROP TABLE IF EXISTS cast2; diff --git a/tests/queries/0_stateless/00688_low_cardinality_syntax.reference b/tests/queries/0_stateless/00688_low_cardinality_syntax.reference index 035402c889d..ca27069a7df 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_syntax.reference +++ b/tests/queries/0_stateless/00688_low_cardinality_syntax.reference @@ -18,3 +18,6 @@ c d cb db +- +61f0c404-5cb3-11e7-907b-a6006ad3dba0 61f0c404-5cb3-11e7-907b-a6006ad3dba0 61f0c404-5cb3-11e7-907b-a6006ad3dba0 +\N \N \N diff --git a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql index 98d7b7f5f8a..3ca7b482b84 100644 --- a/tests/queries/0_stateless/00688_low_cardinality_syntax.sql +++ b/tests/queries/0_stateless/00688_low_cardinality_syntax.sql @@ -71,3 +71,12 @@ select (toLowCardinality('a') as val) || 'b' group by val; select toLowCardinality(z) as val from (select arrayJoin(['c', 'd']) as z) group by val; select (toLowCardinality(z) as val) || 'b' from (select arrayJoin(['c', 'd']) as z) group by val; +select '-'; +drop table if exists lc_str_uuid; +create table lc_str_uuid(str1 String, str2 LowCardinality(String), str3 StringWithDictionary) ENGINE=Memory; +select toUUID(str1), toUUID(str2), toUUID(str3) from lc_str_uuid; +select toUUID(str1, '', NULL), toUUID(str2, '', NULL), toUUID(str3, '', NULL) from lc_str_uuid; +insert into lc_str_uuid values ('61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0', '61f0c404-5cb3-11e7-907b-a6006ad3dba0'); +select toUUID(str1), toUUID(str2), toUUID(str3) from lc_str_uuid; +select toUUID(str1, '', NULL), toUUID(str2, '', NULL), toUUID(str3, '', NULL) from lc_str_uuid; +drop table if exists lc_str_uuid; diff --git a/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql b/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql index b23d8a566c8..d4d260ee92e 100644 --- a/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql +++ b/tests/queries/0_stateless/00717_low_cardinaliry_distributed_group_by.sql @@ -1,4 +1,6 @@ +set insert_distributed_sync = 1; set allow_suspicious_low_cardinality_types = 1; + DROP TABLE IF EXISTS test_low_null_float; DROP TABLE IF EXISTS dist_00717; diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference index 5060b5253fe..e0922ad435d 100644 --- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference +++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.reference @@ -49,3 +49,19 @@ FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF:FFFF is ipv6 string: 1 ::ffff:127.0.0.1 is ipv6 string: 1 ::ffff:8.8.8.8 is ipv6 string: 1 2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D is ipv6 string: 1 +::ffff:0.0.0.0 +::ffff:127.0.0.1 +::ffff:127.0.0.1 +::ffff:127.0.0.0 +::ffff:127.0.0.1 +::ffff:127.0.0.2 +::ffff:127.0.0.3 +::ffff:127.0.0.4 +::ffff:127.0.0.5 +::ffff:127.0.0.6 +::ffff:127.0.0.7 +::ffff:127.0.0.8 +::ffff:127.0.0.9 +::ffff:127.0.0.10 +::ffff:127.0.0.11 +::ffff:127.0.0.12 diff --git a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql index 099dc20762e..5815afb1605 100644 --- a/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql +++ b/tests/queries/0_stateless/00725_ipv4_ipv6_domains.sql @@ -84,3 +84,9 @@ SELECT '::ffff:127.0.0.1 is ipv6 string: ', isIPv6String( SELECT '::ffff:8.8.8.8 is ipv6 string: ', isIPv6String('::ffff:8.8.8.8'); SELECT '2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D is ipv6 string: ', isIPv6String('2001:0DB8:AC10:FE01:FEED:BABE:CAFE:F00D'); +-- IPV6 functions parse IPv4 addresses. + +SELECT toIPv6('0.0.0.0'); +SELECT toIPv6('127.0.0.1'); +SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0); +SELECT toIPv6('127.0.0.' || toString(number)) FROM numbers(13); diff --git a/tests/queries/0_stateless/00738_lock_for_inner_table.sh b/tests/queries/0_stateless/00738_lock_for_inner_table.sh index 9540d566ac3..d19288f65d8 100755 --- a/tests/queries/0_stateless/00738_lock_for_inner_table.sh +++ b/tests/queries/0_stateless/00738_lock_for_inner_table.sh @@ -5,21 +5,37 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -echo "DROP TABLE IF EXISTS tab_00738; -DROP TABLE IF EXISTS mv; -CREATE TABLE tab_00738(a Int) ENGINE = Log; -CREATE MATERIALIZED VIEW mv UUID '00000738-1000-4000-8000-000000000001' ENGINE = Log AS SELECT a FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n +# there are some issues with Atomic database, let's generate it uniq +# otherwise flaky check will not pass. +uuid=$(${CLICKHOUSE_CLIENT} --query "SELECT reinterpretAsUUID(currentDatabase())") -${CLICKHOUSE_CLIENT} --query_id test_00738 --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" & +echo "DROP TABLE IF EXISTS tab_00738 SYNC; +DROP TABLE IF EXISTS mv SYNC; +-- create table with fsync and 20 partitions for slower INSERT +-- (since increasing number of records will make it significantly slower in debug build, but not in release) +CREATE TABLE tab_00738(a Int) ENGINE = MergeTree() ORDER BY a PARTITION BY a%20 SETTINGS fsync_after_insert=1; +CREATE MATERIALIZED VIEW mv UUID '$uuid' ENGINE = Log AS SELECT a FROM tab_00738;" | ${CLICKHOUSE_CLIENT} -n + +${CLICKHOUSE_CLIENT} --query_id insert_$CLICKHOUSE_DATABASE --query "INSERT INTO tab_00738 SELECT number FROM numbers(10000000)" & function drop() { - ${CLICKHOUSE_CLIENT} --query "DROP TABLE \`.inner_id.00000738-1000-4000-8000-000000000001\`" -n + ${CLICKHOUSE_CLIENT} --query "DROP TABLE \`.inner_id.$uuid\`" -n } function wait_for_query_to_start() { - while [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'test_00738'") == 0 ]]; do sleep 0.001; done + while [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'insert_$CLICKHOUSE_DATABASE'") == 0 ]]; do sleep 0.001; done + + # The query is already started, but there is no guarantee that it locks the underlying table already. + # Wait until PushingToViewsBlockOutputStream will acquire the lock of the underlying table for the INSERT query. + # (assume that 0.5 second is enough for this, but this is not 100% correct) + sleep 0.5 + + # query already finished, fail + if [[ $(${CLICKHOUSE_CLIENT} --query "SELECT count() FROM system.processes WHERE query_id = 'insert_$CLICKHOUSE_DATABASE'") == 0 ]]; then + exit 2 + fi } export -f wait_for_query_to_start diff --git a/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql b/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql index 15573d859bb..196dfd84c7f 100644 --- a/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql +++ b/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql @@ -1,3 +1,5 @@ +SET insert_distributed_sync = 1; + DROP TABLE IF EXISTS low_cardinality; DROP TABLE IF EXISTS low_cardinality_all; diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto new file mode 100644 index 00000000000..8673924c929 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.proto @@ -0,0 +1,14 @@ +syntax = "proto3"; + +message ABC +{ + message nested + { + message nested + { + repeated int32 c = 1; + } + repeated nested b = 1; + } + repeated nested a = 1; +} \ No newline at end of file diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference new file mode 100644 index 00000000000..69e7d5e1da8 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.reference @@ -0,0 +1,52 @@ +[[],[[]],[[1]],[[2,3],[4]]] +[[[5,6,7]],[[8,9,10]]] + +Binary representation: +00000000 1a 0a 00 0a 02 0a 00 0a 05 0a 03 0a 01 01 0a 0b |................| +00000010 0a 04 0a 02 02 03 0a 03 0a 01 04 12 0a 07 0a 05 |................| +00000020 0a 03 05 06 07 0a 07 0a 05 0a 03 08 09 0a |..............| +0000002e + +MESSAGE #1 AT 0x00000001 +a { +} +a { + b { + } +} +a { + b { + c: 1 + } +} +a { + b { + c: 2 + c: 3 + } + b { + c: 4 + } +} +MESSAGE #2 AT 0x0000001C +a { + b { + c: 5 + c: 6 + c: 7 + } +} +a { + b { + c: 8 + c: 9 + c: 10 + } +} + +Binary representation is as expected + +[[],[[]],[[1]],[[2,3],[4]]] +[[[5,6,7]],[[8,9,10]]] +[[],[[]],[[1]],[[2,3],[4]]] +[[[5,6,7]],[[8,9,10]]] diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh new file mode 100755 index 00000000000..903217ca939 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_array_3dim.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eo pipefail + +# Run the client. +$CLICKHOUSE_CLIENT --multiquery <<'EOF' +DROP TABLE IF EXISTS array_3dim_protobuf_00825; + +CREATE TABLE array_3dim_protobuf_00825 +( + `a_b_c` Array(Array(Array(Int32))) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO array_3dim_protobuf_00825 VALUES ([[], [[]], [[1]], [[2,3],[4]]]), ([[[5, 6, 7]], [[8, 9, 10]]]); + +SELECT * FROM array_3dim_protobuf_00825; +EOF + +BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_3dim.XXXXXX.binary") +$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_3dim:ABC'" > "$BINARY_FILE_PATH" + +# Check the output in the protobuf format +echo +$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_3dim:ABC" --input "$BINARY_FILE_PATH" + +# Check the input in the protobuf format (now the table contains the same data twice). +echo +$CLICKHOUSE_CLIENT --query "INSERT INTO array_3dim_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_3dim:ABC'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "SELECT * FROM array_3dim_protobuf_00825" + +rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto new file mode 100644 index 00000000000..8f84164da2a --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.proto @@ -0,0 +1,9 @@ +syntax = "proto3"; + +message AA { + message nested_array { + repeated double c = 2; + } + string a = 1; + repeated nested_array b = 2; +} \ No newline at end of file diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference new file mode 100644 index 00000000000..5ea6780a3ba --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.reference @@ -0,0 +1,41 @@ +one [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]] + +Binary representation: +00000000 6b 0a 03 6f 6e 65 12 1a 12 18 00 00 00 00 00 00 |k..one..........| +00000010 f0 3f 00 00 00 00 00 00 00 40 00 00 00 00 00 00 |.?.......@......| +00000020 08 40 12 12 12 10 00 00 00 00 00 00 e0 3f 00 00 |.@...........?..| +00000030 00 00 00 00 d0 3f 12 00 12 12 12 10 00 00 00 00 |.....?..........| +00000040 00 00 10 40 00 00 00 00 00 00 14 40 12 12 12 10 |...@.......@....| +00000050 00 00 00 00 00 00 c0 3f 00 00 00 00 00 00 b0 3f |.......?.......?| +00000060 12 0a 12 08 00 00 00 00 00 00 18 40 |...........@| +0000006c + +MESSAGE #1 AT 0x00000001 +a: "one" +b { + c: 1 + c: 2 + c: 3 +} +b { + c: 0.5 + c: 0.25 +} +b { +} +b { + c: 4 + c: 5 +} +b { + c: 0.125 + c: 0.0625 +} +b { + c: 6 +} + +Binary representation is as expected + +one [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]] +one [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]] diff --git a/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh new file mode 100755 index 00000000000..0b386723091 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_array_of_arrays.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# https://github.com/ClickHouse/ClickHouse/issues/9069 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eo pipefail + +# Run the client. +$CLICKHOUSE_CLIENT --multiquery <<'EOF' +CREATE TABLE array_of_arrays_protobuf_00825 +( + `a` String, + `b` Nested ( + `c` Array(Float64) + ) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO array_of_arrays_protobuf_00825 VALUES ('one', [[1,2,3],[0.5,0.25],[],[4,5],[0.125,0.0625],[6]]); + +SELECT * FROM array_of_arrays_protobuf_00825; +EOF + +BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_array_of_arrays.XXXXXX.binary") +$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_array_of_arrays:AA'" > "$BINARY_FILE_PATH" + +# Check the output in the protobuf format +echo +$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_array_of_arrays:AA" --input "$BINARY_FILE_PATH" + +# Check the input in the protobuf format (now the table contains the same data twice). +echo +$CLICKHOUSE_CLIENT --query "INSERT INTO array_of_arrays_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_array_of_arrays:AA'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "SELECT * FROM array_of_arrays_protobuf_00825" + +rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto new file mode 100644 index 00000000000..ba558dbbadb --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.proto @@ -0,0 +1,13 @@ +syntax = "proto3"; + +message Message +{ + enum Enum + { + FIRST = 0; + SECOND = 1; + TEN = 10; + HUNDRED = 100; + }; + Enum x = 1; +}; \ No newline at end of file diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference new file mode 100644 index 00000000000..ef8059bac28 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.reference @@ -0,0 +1,31 @@ +Second +Third +First +First +Second + +Binary representation: +00000000 02 08 01 02 08 64 00 00 02 08 01 |.....d.....| +0000000b + +MESSAGE #1 AT 0x00000001 +x: SECOND +MESSAGE #2 AT 0x00000004 +x: HUNDRED +MESSAGE #3 AT 0x00000007 +MESSAGE #4 AT 0x00000008 +MESSAGE #5 AT 0x00000009 +x: SECOND + +Binary representation is as expected + +Second +Third +First +First +Second +Second +Third +First +First +Second diff --git a/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh new file mode 100755 index 00000000000..cbb387a62a5 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_enum_mapping.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +# https://github.com/ClickHouse/ClickHouse/issues/7438 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eo pipefail + +# Run the client. +$CLICKHOUSE_CLIENT --multiquery <<'EOF' +DROP TABLE IF EXISTS enum_mapping_protobuf_00825; + +CREATE TABLE enum_mapping_protobuf_00825 +( + x Enum16('First'=-100, 'Second'=0, 'Third'=100) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO enum_mapping_protobuf_00825 VALUES ('Second'), ('Third'), ('First'), ('First'), ('Second'); + +SELECT * FROM enum_mapping_protobuf_00825; +EOF + +BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_enum_mapping.XXXXXX.binary") +$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_enum_mapping:Message'" > "$BINARY_FILE_PATH" + +# Check the output in the protobuf format +echo +$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_enum_mapping:Message" --input "$BINARY_FILE_PATH" + +# Check the input in the protobuf format (now the table contains the same data twice). +echo +$CLICKHOUSE_CLIENT --query "INSERT INTO enum_mapping_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_enum_mapping:Message'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "SELECT * FROM enum_mapping_protobuf_00825" + +rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.proto b/tests/queries/0_stateless/00825_protobuf_format_map.proto new file mode 100644 index 00000000000..561b409b733 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_map.proto @@ -0,0 +1,5 @@ +syntax = "proto3"; + +message Message { + map a = 1; +}; diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.reference b/tests/queries/0_stateless/00825_protobuf_format_map.reference new file mode 100644 index 00000000000..e3f17cb1095 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_map.reference @@ -0,0 +1,19 @@ +{'x':5,'y':7} +{'z':11} +{'temp':0} +{'':0} + +Binary representation: +00000000 0e 0a 05 0a 01 78 10 05 0a 05 0a 01 79 10 07 07 |.....x......y...| +00000010 0a 05 0a 01 7a 10 0b 0a 0a 08 0a 04 74 65 6d 70 |....z.......temp| +00000020 10 00 06 0a 04 0a 00 10 00 |.........| +00000029 + +{'x':5,'y':7} +{'z':11} +{'temp':0} +{'':0} +{'x':5,'y':7} +{'z':11} +{'temp':0} +{'':0} diff --git a/tests/queries/0_stateless/00825_protobuf_format_map.sh b/tests/queries/0_stateless/00825_protobuf_format_map.sh new file mode 100755 index 00000000000..5df25c41750 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_map.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +# https://github.com/ClickHouse/ClickHouse/issues/6497 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eo pipefail + +# Run the client. +$CLICKHOUSE_CLIENT --multiquery <<'EOF' +SET allow_experimental_map_type = 1; + +DROP TABLE IF EXISTS map_00825; + +CREATE TABLE map_00825 +( + a Map(String, UInt32) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO map_00825 VALUES ({'x':5, 'y':7}), ({'z':11}), ({'temp':0}), ({'':0}); + +SELECT * FROM map_00825; +EOF + +BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_map.XXXXXX.binary") +$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_map:Message'" > "$BINARY_FILE_PATH" + +# Check the output in the protobuf format +echo +echo "Binary representation:" +hexdump -C $BINARY_FILE_PATH + +# Check the input in the protobuf format (now the table contains the same data twice). +echo +$CLICKHOUSE_CLIENT --query "INSERT INTO map_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_map:Message'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "SELECT * FROM map_00825" + +rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto new file mode 100644 index 00000000000..052741f504b --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.proto @@ -0,0 +1,10 @@ +syntax = "proto3"; + +message Repeated { + string foo = 1; + int64 bar = 2; +} + +message Message { + repeated Repeated messages = 1; +}; \ No newline at end of file diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference new file mode 100644 index 00000000000..6cdd56a5b7f --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.reference @@ -0,0 +1,25 @@ +['1'] [0] +['1',''] [0,1] + +Binary representation: +00000000 05 0a 03 0a 01 31 09 0a 03 0a 01 31 0a 02 10 01 |.....1.....1....| +00000010 + +MESSAGE #1 AT 0x00000001 +messages { + foo: "1" +} +MESSAGE #2 AT 0x00000007 +messages { + foo: "1" +} +messages { + bar: 1 +} + +Binary representation is as expected + +['1'] [0] +['1',''] [0,1] +['1'] [0] +['1',''] [0,1] diff --git a/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh new file mode 100755 index 00000000000..58ded92f2c1 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_nested_optional.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +# https://github.com/ClickHouse/ClickHouse/issues/6497 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eo pipefail + +# Run the client. +$CLICKHOUSE_CLIENT --multiquery <<'EOF' +DROP TABLE IF EXISTS nested_optional_protobuf_00825; + +CREATE TABLE nested_optional_protobuf_00825 +( + messages Nested + ( + foo String, + bar Int64 + ) +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO nested_optional_protobuf_00825 VALUES (['1'], [0]), (['1', ''], [0, 1]); + +SELECT * FROM nested_optional_protobuf_00825; +EOF + +BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_nested_optional.XXXXXX.binary") +$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_nested_optional:Message'" > "$BINARY_FILE_PATH" + +# Check the output in the protobuf format +echo +$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_nested_optional:Message" --input "$BINARY_FILE_PATH" + +# Check the input in the protobuf format (now the table contains the same data twice). +echo +$CLICKHOUSE_CLIENT --query "INSERT INTO nested_optional_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_nested_optional:Message'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "SELECT * FROM nested_optional_protobuf_00825" + +rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.proto b/tests/queries/0_stateless/00825_protobuf_format_table_default.proto new file mode 100644 index 00000000000..08e6049ffe0 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.proto @@ -0,0 +1,6 @@ +syntax = "proto3"; + +message Message { + sint32 x = 1; + sint32 z = 2; +}; \ No newline at end of file diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.reference b/tests/queries/0_stateless/00825_protobuf_format_table_default.reference new file mode 100644 index 00000000000..5472f3bfa14 --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.reference @@ -0,0 +1,37 @@ +0 0 0 +2 4 8 +3 9 27 +5 25 125 +101 102 103 + +Binary representation: +00000000 00 04 08 04 10 10 04 08 06 10 36 05 08 0a 10 fa |..........6.....| +00000010 01 06 08 ca 01 10 ce 01 |........| +00000018 + +MESSAGE #1 AT 0x00000001 +MESSAGE #2 AT 0x00000002 +x: 2 +z: 8 +MESSAGE #3 AT 0x00000007 +x: 3 +z: 27 +MESSAGE #4 AT 0x0000000C +x: 5 +z: 125 +MESSAGE #5 AT 0x00000012 +x: 101 +z: 103 + +Binary representation is as expected + +0 0 0 +0 0 0 +2 4 8 +2 4 8 +3 9 27 +3 9 27 +5 25 125 +5 25 125 +101 102 103 +101 10201 103 diff --git a/tests/queries/0_stateless/00825_protobuf_format_table_default.sh b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh new file mode 100755 index 00000000000..97f7769269a --- /dev/null +++ b/tests/queries/0_stateless/00825_protobuf_format_table_default.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +set -eo pipefail + +# Run the client. +$CLICKHOUSE_CLIENT --multiquery <<'EOF' +DROP TABLE IF EXISTS table_default_protobuf_00825; + +CREATE TABLE table_default_protobuf_00825 +( + x Int64, + y Int64 DEFAULT x * x, + z Int64 DEFAULT x * x * x +) ENGINE = MergeTree ORDER BY tuple(); + +INSERT INTO table_default_protobuf_00825 (x) VALUES (0), (2), (3), (5); +INSERT INTO table_default_protobuf_00825 VALUES (101, 102, 103); + +SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z; +EOF + +BINARY_FILE_PATH=$(mktemp "$CURDIR/00825_protobuf_format_table_default.XXXXXX.binary") +$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z FORMAT Protobuf SETTINGS format_schema = '$CURDIR/00825_protobuf_format_table_default:Message'" > "$BINARY_FILE_PATH" + +# Check the output in the protobuf format +echo +$CURDIR/helpers/protobuf_length_delimited_encoder.py --decode_and_check --format_schema "$CURDIR/00825_protobuf_format_table_default:Message" --input "$BINARY_FILE_PATH" + +# Check the input in the protobuf format (now the table contains the same data twice). +echo +$CLICKHOUSE_CLIENT --query "INSERT INTO table_default_protobuf_00825 FORMAT Protobuf SETTINGS format_schema='$CURDIR/00825_protobuf_format_table_default:Message'" < "$BINARY_FILE_PATH" +$CLICKHOUSE_CLIENT --query "SELECT * FROM table_default_protobuf_00825 ORDER BY x,y,z" + +rm "$BINARY_FILE_PATH" diff --git a/tests/queries/0_stateless/00826_cross_to_inner_join.reference b/tests/queries/0_stateless/00826_cross_to_inner_join.reference index e7c8d6b1ea9..84867de2849 100644 --- a/tests/queries/0_stateless/00826_cross_to_inner_join.reference +++ b/tests/queries/0_stateless/00826_cross_to_inner_join.reference @@ -95,7 +95,7 @@ SELECT t2_00826.a, t2_00826.b FROM t1_00826 -ALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b) +ALL INNER JOIN t2_00826 ON (((a = t2_00826.a) AND (a = t2_00826.a)) AND (a = t2_00826.a)) AND (b = t2_00826.b) WHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b))) cross split conjunction SELECT diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference index fc39ef13935..4db65b0b795 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.reference +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.reference @@ -127,7 +127,7 @@ FROM ) AS `--.s` CROSS JOIN t3 ) AS `--.s` -ALL INNER JOIN t4 ON (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`) +ALL INNER JOIN t4 ON ((a = `--t1.a`) AND (a = `--t2.a`)) AND (a = `--t3.a`) WHERE (a = `--t1.a`) AND (a = `--t2.a`) AND (a = `--t3.a`) SELECT `--t1.a` AS `t1.a` FROM diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.reference b/tests/queries/0_stateless/00878_join_unexpected_results.reference index a389cb47a96..65fcbc257ca 100644 --- a/tests/queries/0_stateless/00878_join_unexpected_results.reference +++ b/tests/queries/0_stateless/00878_join_unexpected_results.reference @@ -23,6 +23,8 @@ join_use_nulls = 1 - \N \N - +1 1 \N \N +2 2 \N \N - 1 1 1 1 2 2 \N \N @@ -49,6 +51,8 @@ join_use_nulls = 0 - - - +1 1 0 0 +2 2 0 0 - 1 1 1 1 2 2 0 0 diff --git a/tests/queries/0_stateless/00878_join_unexpected_results.sql b/tests/queries/0_stateless/00878_join_unexpected_results.sql index 0aef5208b26..6f6cd6e6479 100644 --- a/tests/queries/0_stateless/00878_join_unexpected_results.sql +++ b/tests/queries/0_stateless/00878_join_unexpected_results.sql @@ -30,11 +30,11 @@ select * from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; select '-'; select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; select '-'; -select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 } +select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; select '-'; select t.*, s.* from t left join s on (s.a=t.a) order by t.a; select '-'; -select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 } +select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; select 'join_use_nulls = 0'; set join_use_nulls = 0; @@ -58,11 +58,11 @@ select '-'; select '-'; -- select s.* from t left outer join s on (t.a=s.a and t.b=s.b) where s.a is null; -- TODO select '-'; -select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; -- {serverError 403 } +select t.*, s.* from t left join s on (s.a=t.a and t.b=s.b and t.a=toInt64(2)) order by t.a; select '-'; select t.*, s.* from t left join s on (s.a=t.a) order by t.a; select '-'; -select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; -- {serverError 403 } +select t.*, s.* from t left join s on (t.b=toInt64(2) and s.a=t.a) where s.b=2; drop table t; drop table s; diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.reference b/tests/queries/0_stateless/00945_bloom_filter_index.reference index 184aafdd568..c0c2254648e 100644 --- a/tests/queries/0_stateless/00945_bloom_filter_index.reference +++ b/tests/queries/0_stateless/00945_bloom_filter_index.reference @@ -211,6 +211,14 @@ 2 1 1 +2 +2 +2 +2 +1 +2 +1 +2 1 value1 1 value2 2 value3 diff --git a/tests/queries/0_stateless/00945_bloom_filter_index.sql b/tests/queries/0_stateless/00945_bloom_filter_index.sql index 82321a75c67..f45c4c04290 100644 --- a/tests/queries/0_stateless/00945_bloom_filter_index.sql +++ b/tests/queries/0_stateless/00945_bloom_filter_index.sql @@ -163,23 +163,23 @@ DROP TABLE IF EXISTS bloom_filter_lc_null_types_test; DROP TABLE IF EXISTS bloom_filter_array_lc_null_types_test; CREATE TABLE bloom_filter_array_lc_null_types_test ( - order_key Array(LowCardinality(Nullable((UInt64)))), + order_key Array(LowCardinality(Nullable(UInt64))), - i8 Array(LowCardinality(Nullable((Int8)))), - i16 Array(LowCardinality(Nullable((Int16)))), - i32 Array(LowCardinality(Nullable((Int32)))), - i64 Array(LowCardinality(Nullable((Int64)))), - u8 Array(LowCardinality(Nullable((UInt8)))), - u16 Array(LowCardinality(Nullable((UInt16)))), - u32 Array(LowCardinality(Nullable((UInt32)))), - u64 Array(LowCardinality(Nullable((UInt64)))), - f32 Array(LowCardinality(Nullable((Float32)))), - f64 Array(LowCardinality(Nullable((Float64)))), + i8 Array(LowCardinality(Nullable(Int8))), + i16 Array(LowCardinality(Nullable(Int16))), + i32 Array(LowCardinality(Nullable(Int32))), + i64 Array(LowCardinality(Nullable(Int64))), + u8 Array(LowCardinality(Nullable(UInt8))), + u16 Array(LowCardinality(Nullable(UInt16))), + u32 Array(LowCardinality(Nullable(UInt32))), + u64 Array(LowCardinality(Nullable(UInt64))), + f32 Array(LowCardinality(Nullable(Float32))), + f64 Array(LowCardinality(Nullable(Float64))), - date Array(LowCardinality(Nullable((Date)))), + date Array(LowCardinality(Nullable(Date))), date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))), - str Array(LowCardinality(Nullable((String)))), + str Array(LowCardinality(Nullable(String))), fixed_string Array(LowCardinality(Nullable(FixedString(5)))), INDEX idx (i8, i16, i32, i64, u8, u16, u32, u64, f32, f64, date, date_time, str, fixed_string) TYPE bloom_filter GRANULARITY 1) @@ -286,7 +286,7 @@ SELECT COUNT() FROM bloom_filter_array_lc_null_types_test WHERE has(fixed_string DROP TABLE IF EXISTS bloom_filter_array_lc_null_types_test; DROP TABLE IF EXISTS bloom_filter_array_offsets_lc_str; -CREATE TABLE bloom_filter_array_offsets_lc_str (order_key int, str Array(LowCardinality((String))), INDEX idx str TYPE bloom_filter(1.) GRANULARITY 1024) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 1024; +CREATE TABLE bloom_filter_array_offsets_lc_str (order_key int, str Array(LowCardinality(String)), INDEX idx str TYPE bloom_filter(1.) GRANULARITY 1024) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 1024; INSERT INTO bloom_filter_array_offsets_lc_str SELECT number AS i, if(i%2, ['value'], []) FROM system.numbers LIMIT 10000; SELECT count() FROM bloom_filter_array_offsets_lc_str WHERE has(str, 'value'); DROP TABLE IF EXISTS bloom_filter_array_offsets_lc_str; @@ -348,6 +348,16 @@ SELECT id FROM test_bf_indexOf WHERE 1 <= indexOf(ary, 'value1') ORDER BY id FOR SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') >= 2 ORDER BY id FORMAT TSV; SELECT id FROM test_bf_indexOf WHERE 2 <= indexOf(ary, 'value1') ORDER BY id FORMAT TSV; +SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') = toDecimal32(0, 2) ORDER BY id FORMAT TSV; +SELECT id FROM test_bf_indexOf WHERE toDecimal128(0, 2) = indexOf(ary, 'value1') ORDER BY id FORMAT TSV; +SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') = '0' ORDER BY id FORMAT TSV; +SELECT id FROM test_bf_indexOf WHERE '0' = indexOf(ary, 'value1') ORDER BY id FORMAT TSV; + +SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') > toDecimal32(0, 2) ORDER BY id FORMAT TSV; +SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') < toDecimal128(1, 2) ORDER BY id FORMAT TSV; +SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') > '0' ORDER BY id FORMAT TSV; +SELECT id FROM test_bf_indexOf WHERE indexOf(ary, 'value1') < '1' ORDER BY id FORMAT TSV; + SELECT id, ary[indexOf(ary, 'value1')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value1')] = 'value1' ORDER BY id FORMAT TSV; SELECT id, ary[indexOf(ary, 'value2')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value2')] = 'value2' ORDER BY id FORMAT TSV; SELECT id, ary[indexOf(ary, 'value3')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value3')] = 'value3' ORDER BY id FORMAT TSV; diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh.py new file mode 100755 index 00000000000..27308548452 --- /dev/null +++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 +import os +import sys +import signal + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from client import client, prompt, end_of_block + +log = None +# uncomment the line below for debugging +#log=sys.stdout + +with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2: + client1.expect(prompt) + client2.expect(prompt) + + client1.send('SET allow_experimental_live_view = 1') + client1.expect(prompt) + client2.send('SET allow_experimental_live_view = 1') + client2.expect(prompt) + + client1.send('DROP TABLE IF EXISTS test.lv') + client1.expect(prompt) + client1.send("CREATE LIVE VIEW test.lv WITH REFRESH 1" + " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'") + client1.expect(prompt) + client1.send('WATCH test.lv FORMAT JSONEachRow') + client1.expect(r'"_version":' + end_of_block) + client1.expect(r'"_version":' + end_of_block) + client1.expect(r'"_version":' + end_of_block) + # send Ctrl-C + client1.send('\x03', eol='') + match = client1.expect('(%s)|([#\$] )' % prompt) + if match.groups()[1]: + client1.send(client1.command) + client1.expect(prompt) + client1.send('DROP TABLE test.lv') + client1.expect(prompt) + diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh.reference b/tests/queries/0_stateless/00962_live_view_periodic_refresh.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py new file mode 100755 index 00000000000..76b9980d1f5 --- /dev/null +++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +import os +import sys +import time +import signal + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from client import client, prompt, end_of_block + +log = None +# uncomment the line below for debugging +#log=sys.stdout + +with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2: + client1.expect(prompt) + client2.expect(prompt) + + client1.send('SET allow_experimental_live_view = 1') + client1.expect(prompt) + client2.send('SET allow_experimental_live_view = 1') + client2.expect(prompt) + + client1.send('DROP TABLE IF EXISTS test.lv') + client1.expect(prompt) + client1.send("CREATE LIVE VIEW test.lv WITH TIMEOUT 60 AND REFRESH 1" + " AS SELECT value FROM system.events WHERE event = 'OSCPUVirtualTimeMicroseconds'") + client1.expect(prompt) + client1.send('WATCH test.lv FORMAT JSONEachRow') + client1.expect(r'"_version":' + end_of_block) + client1.expect(r'"_version":' + end_of_block) + client1.expect(r'"_version":' + end_of_block) + # send Ctrl-C + client1.send('\x03', eol='') + match = client1.expect('(%s)|([#\$] )' % prompt) + if match.groups()[1]: + client1.send(client1.command) + client1.expect(prompt) + # poll until live view table is dropped + start_time = time.time() + while True: + client1.send('SELECT * FROM test.lv FORMAT JSONEachRow') + client1.expect(prompt) + if 'Table test.lv doesn\'t exist' in client1.before: + break + if time.time() - start_time > 90: + break + # check table is dropped + client1.send('DROP TABLE test.lv') + client1.expect('Table test.lv doesn\'t exist') + client1.expect(prompt) diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.reference b/tests/queries/0_stateless/00962_live_view_periodic_refresh_and_timeout.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py new file mode 100755 index 00000000000..34d5db676f4 --- /dev/null +++ b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python3 +import os +import sys +import signal + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from client import client, prompt, end_of_block + +log = None +# uncomment the line below for debugging +#log=sys.stdout + +with client(name='client1>', log=log) as client1, client(name='client2>', log=log) as client2: + client1.expect(prompt) + client2.expect(prompt) + + client1.send('SET allow_experimental_live_view = 1') + client1.expect(prompt) + client2.send('SET allow_experimental_live_view = 1') + client2.expect(prompt) + + client1.send('DROP TABLE IF EXISTS test.lv') + client1.expect(prompt) + client1.send('DROP TABLE IF EXISTS test.mt') + client1.expect(prompt) + client1.send('DROP DICTIONARY IF EXITS test.dict') + client1.expect(prompt) + + client1.send("CREATE TABLE test.mt (a Int32, b Int32) Engine=MergeTree order by tuple()") + client1.expect(prompt) + client1.send("CREATE DICTIONARY test.dict(a Int32, b Int32) PRIMARY KEY a LAYOUT(FLAT()) " + \ + "SOURCE(CLICKHOUSE(db 'test' table 'mt')) LIFETIME(1)") + client1.expect(prompt) + client1.send("CREATE LIVE VIEW test.lv WITH REFRESH 1 AS SELECT * FROM test.dict") + client1.expect(prompt) + + client2.send("INSERT INTO test.mt VALUES (1,2)") + client2.expect(prompt) + + client1.send('WATCH test.lv FORMAT JSONEachRow') + client1.expect(r'"_version":"1"') + + client2.send("INSERT INTO test.mt VALUES (2,2)") + client2.expect(prompt) + client1.expect(r'"_version":"2"') + + client2.send("INSERT INTO test.mt VALUES (3,2)") + client2.expect(prompt) + client1.expect(r'"_version":"3"') + + # send Ctrl-C + client1.send('\x03', eol='') + match = client1.expect('(%s)|([#\$] )' % prompt) + if match.groups()[1]: + client1.send(client1.command) + client1.expect(prompt) + + client1.send('DROP TABLE IF EXISTS test.lv') + client1.expect(prompt) + client1.send('DROP DICTIONARY IF EXISTS test.dict') + client1.expect(prompt) + client1.send('DROP TABLE IF EXISTS test.mt') + client1.expect(prompt) + + + diff --git a/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.reference b/tests/queries/0_stateless/00962_live_view_periodic_refresh_dictionary.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql index 455fab694cd..3b562801f92 100644 --- a/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql +++ b/tests/queries/0_stateless/00967_insert_into_distributed_different_types.sql @@ -1,8 +1,12 @@ +set insert_distributed_sync=1; + DROP TABLE IF EXISTS dist_00967; DROP TABLE IF EXISTS underlying_00967; +-- To suppress "Structure does not match (...), implicit conversion will be done." message +SET send_logs_level='error'; + CREATE TABLE dist_00967 (key UInt64) Engine=Distributed('test_shard_localhost', currentDatabase(), underlying_00967); --- fails for TinyLog()/MergeTree()/... but not for Memory() CREATE TABLE underlying_00967 (key Nullable(UInt64)) Engine=TinyLog(); INSERT INTO dist_00967 SELECT toUInt64(number) FROM system.numbers LIMIT 1; diff --git a/tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py b/tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py.disabled similarity index 100% rename from tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py rename to tests/queries/0_stateless/00979_live_view_watch_live_moving_avg.py.disabled diff --git a/tests/queries/0_stateless/01016_uniqCombined64.sql b/tests/queries/0_stateless/01016_uniqCombined64.sql index 4720b53d15e..acf8135760a 100644 --- a/tests/queries/0_stateless/01016_uniqCombined64.sql +++ b/tests/queries/0_stateless/01016_uniqCombined64.sql @@ -5,5 +5,5 @@ -- test is just to ensure that the result is different (and to document the -- outcome). -SELECT uniqCombined(number) FROM numbers(toUInt64(1e7)); -SELECT uniqCombined64(number) FROM numbers(toUInt64(1e7)); +SELECT uniqCombined(number) FROM numbers(1e7); +SELECT uniqCombined64(number) FROM numbers(1e7); diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql index bfcfec2b8ba..2ad1edae733 100644 --- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql +++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql @@ -5,45 +5,45 @@ -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements SELECT 'UInt32'; SET max_memory_usage = 4000000; -SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 } +SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(8192 * 100) GROUP BY k); -- { serverError 241 } SET max_memory_usage = 9830400; -SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); +SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(number % 8192) u FROM numbers(8192 * 100) GROUP BY k); -- HashTable for UInt64 (used until (1<<12) elements), hence 4096 elements SELECT 'UInt64'; SET max_memory_usage = 4000000; -SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 } +SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError 241 } SET max_memory_usage = 9830400; -SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); +SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k); SELECT 'K=16'; -- HashTable for UInt32 (used until (1<<12) elements), hence 4096 elements SELECT 'UInt32'; SET max_memory_usage = 2000000; -SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); -- { serverError 241 } +SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError 241 } SET max_memory_usage = 4915200; -SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(toUInt64(4096 * 100)) GROUP BY k); +SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(16)(number % 4096) u FROM numbers(4096 * 100) GROUP BY k); -- HashTable for UInt64 (used until (1<<11) elements), hence 2048 elements SELECT 'UInt64'; SET max_memory_usage = 2000000; -SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k); -- { serverError 241 } +SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); -- { serverError 241 } SET max_memory_usage = 4915200; -SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(toUInt64(2048 * 100)) GROUP BY k); +SELECT sum(u) FROM (SELECT intDiv(number, 2048) AS k, uniqCombined(16)(reinterpretAsString(number % 2048)) u FROM numbers(2048 * 100) GROUP BY k); SELECT 'K=18'; -- HashTable for UInt32 (used until (1<<14) elements), hence 16384 elements SELECT 'UInt32'; SET max_memory_usage = 8000000; -SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k); -- { serverError 241 } +SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(16384 * 100) GROUP BY k); -- { serverError 241 } SET max_memory_usage = 19660800; -SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(toUInt64(16384 * 100)) GROUP BY k); +SELECT sum(u) FROM (SELECT intDiv(number, 16384) AS k, uniqCombined(18)(number % 16384) u FROM numbers(16384 * 100) GROUP BY k); -- HashTable for UInt64 (used until (1<<13) elements), hence 8192 elements SELECT 'UInt64'; SET max_memory_usage = 8000000; -SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); -- { serverError 241 } +SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(8192 * 100) GROUP BY k); -- { serverError 241 } SET max_memory_usage = 19660800; -SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(toUInt64(8192 * 100)) GROUP BY k); +SELECT sum(u) FROM (SELECT intDiv(number, 8192) AS k, uniqCombined(18)(reinterpretAsString(number % 8192)) u FROM numbers(8192 * 100) GROUP BY k); diff --git a/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh b/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh index bc13e44934a..025fe51e2a9 100755 --- a/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh +++ b/tests/queries/0_stateless/01018_ddl_dictionaries_concurrent_requrests.sh @@ -113,8 +113,8 @@ timeout $TIMEOUT bash -c thread7 2> /dev/null & wait $CLICKHOUSE_CLIENT -q "SELECT 'Still alive'" -$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY database_for_dict.dict1" -$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY database_for_dict.dict2" +$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY IF NOT EXISTS database_for_dict.dict1" +$CLICKHOUSE_CLIENT -q "ATTACH DICTIONARY IF NOT EXISTS database_for_dict.dict2" $CLICKHOUSE_CLIENT -n -q " DROP TABLE table_for_dict1; diff --git a/tests/queries/0_stateless/01023_materialized_view_query_context.sql b/tests/queries/0_stateless/01023_materialized_view_query_context.sql index 7ec8d8fd506..351379d8b14 100644 --- a/tests/queries/0_stateless/01023_materialized_view_query_context.sql +++ b/tests/queries/0_stateless/01023_materialized_view_query_context.sql @@ -1,5 +1,8 @@ -- Create dictionary, since dictGet*() uses DB::Context in executeImpl() -- (To cover scope of the Context in DB::PushingToViewsBlockOutputStream::process) + +set insert_distributed_sync=1; + DROP TABLE IF EXISTS mv; DROP DATABASE IF EXISTS dict_in_01023; CREATE DATABASE dict_in_01023; diff --git a/tests/queries/0_stateless/01029_early_constant_folding.reference b/tests/queries/0_stateless/01029_early_constant_folding.reference index 7e2f6c7ce76..8a2d7e6c61a 100644 --- a/tests/queries/0_stateless/01029_early_constant_folding.reference +++ b/tests/queries/0_stateless/01029_early_constant_folding.reference @@ -2,7 +2,7 @@ SELECT 1 WHERE 0 SELECT 1 SELECT 1 -WHERE 0 +WHERE (1 IN (0, 2)) AND (2 = (identity(CAST(2, \'UInt8\')) AS subquery)) SELECT 1 WHERE 1 IN ( ( diff --git a/tests/queries/0_stateless/01029_early_constant_folding.sql b/tests/queries/0_stateless/01029_early_constant_folding.sql index 428c3625295..6336b62e080 100644 --- a/tests/queries/0_stateless/01029_early_constant_folding.sql +++ b/tests/queries/0_stateless/01029_early_constant_folding.sql @@ -4,7 +4,7 @@ EXPLAIN SYNTAX SELECT 1 WHERE 1 = 0; EXPLAIN SYNTAX SELECT 1 WHERE 1 IN (0, 1, 2); -EXPLAIN SYNTAX SELECT 1 WHERE 1 IN (0, 2) AND 2 = (SELECT 2); +EXPLAIN SYNTAX SELECT 1 WHERE 1 IN (0, 2) AND 2 = ((SELECT 2) AS subquery); -- no constant folding diff --git a/tests/queries/0_stateless/01044_great_circle_angle.reference b/tests/queries/0_stateless/01044_great_circle_angle.reference index 60a616c7187..ebdeaa10067 100644 --- a/tests/queries/0_stateless/01044_great_circle_angle.reference +++ b/tests/queries/0_stateless/01044_great_circle_angle.reference @@ -17,11 +17,11 @@ ██████████▎ ████████████▍ ██████████████▍ -████████████████▌ +████████████████▍ ██████████████████▌ -████████████████████▋ -██████████████████████▋ -████████████████████████▋ +████████████████████▌ +██████████████████████▌ +████████████████████████▌ ██████████████████████████▌ ████████████████████████████▍ ██████████████████████████████▍ diff --git a/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql b/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql index 7aac720865d..318f48dc833 100644 --- a/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql +++ b/tests/queries/0_stateless/01046_materialized_view_with_join_over_distributed.sql @@ -1,5 +1,7 @@ -- from https://github.com/ClickHouse/ClickHouse/issues/5142 +set insert_distributed_sync = 1; + DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS t_d; DROP TABLE IF EXISTS t_v; diff --git a/tests/queries/0_stateless/01051_new_any_join_engine.reference b/tests/queries/0_stateless/01051_new_any_join_engine.reference index 635ae641a63..a20fea88656 100644 --- a/tests/queries/0_stateless/01051_new_any_join_engine.reference +++ b/tests/queries/0_stateless/01051_new_any_join_engine.reference @@ -29,3 +29,34 @@ anti left 3 a4 anti right 5 b6 +any left +0 a1 +1 a2 +2 a3 b1 +3 a4 +4 a5 b3 +any inner +2 a3 b1 +4 a5 b3 +any right +2 a3 b1 +2 a3 b2 +4 a5 b3 +4 a5 b4 +4 a5 b5 +5 b6 +semi left +2 a3 b1 +4 a5 b3 +semi right +2 a3 b1 +2 a3 b2 +4 a5 b3 +4 a5 b4 +4 a5 b5 +anti left +0 a1 +1 a2 +3 a4 +anti right +5 b6 diff --git a/tests/queries/0_stateless/01051_new_any_join_engine.sql b/tests/queries/0_stateless/01051_new_any_join_engine.sql index 8662d8532d4..a687a6494b5 100644 --- a/tests/queries/0_stateless/01051_new_any_join_engine.sql +++ b/tests/queries/0_stateless/01051_new_any_join_engine.sql @@ -57,6 +57,29 @@ SELECT * FROM t1 ANTI LEFT JOIN anti_left_join j USING(x) ORDER BY x, str, s; SELECT 'anti right'; SELECT * FROM t1 ANTI RIGHT JOIN anti_right_join j USING(x) ORDER BY x, str, s; +-- run queries once more time (issue #16991) + +SELECT 'any left'; +SELECT * FROM t1 ANY LEFT JOIN any_left_join j USING(x) ORDER BY x, str, s; + +SELECT 'any inner'; +SELECT * FROM t1 ANY INNER JOIN any_inner_join j USING(x) ORDER BY x, str, s; + +SELECT 'any right'; +SELECT * FROM t1 ANY RIGHT JOIN any_right_join j USING(x) ORDER BY x, str, s; + +SELECT 'semi left'; +SELECT * FROM t1 SEMI LEFT JOIN semi_left_join j USING(x) ORDER BY x, str, s; + +SELECT 'semi right'; +SELECT * FROM t1 SEMI RIGHT JOIN semi_right_join j USING(x) ORDER BY x, str, s; + +SELECT 'anti left'; +SELECT * FROM t1 ANTI LEFT JOIN anti_left_join j USING(x) ORDER BY x, str, s; + +SELECT 'anti right'; +SELECT * FROM t1 ANTI RIGHT JOIN anti_right_join j USING(x) ORDER BY x, str, s; + DROP TABLE t1; DROP TABLE any_left_join; diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql index 4ae655b1ec9..222c05ae827 100644 --- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql +++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql @@ -1,3 +1,5 @@ +-- set insert_distributed_sync = 1; -- see https://github.com/ClickHouse/ClickHouse/issues/18971 + DROP TABLE IF EXISTS local_01099_a; DROP TABLE IF EXISTS local_01099_b; DROP TABLE IF EXISTS distributed_01099_a; diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference index 1a9e5685a6a..71be9c3fb5b 100644 --- a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference +++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.reference @@ -1,3 +1,4 @@ 1 2019-01-05 2020-01-10 1 +1 date_table somedict diff --git a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql index 6ad76ee5a7e..471fd7959a9 100644 --- a/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql +++ b/tests/queries/0_stateless/01125_dict_ddl_cannot_add_column.sql @@ -29,6 +29,9 @@ LIFETIME(MIN 300 MAX 360); SELECT * from somedict; +-- No dictionary columns +SELECT 1 FROM somedict; + SHOW TABLES; -DROP DATABASE IF EXISTS database_for_dict; +DROP DATABASE database_for_dict; diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.expect b/tests/queries/0_stateless/01176_mysql_client_interactive.expect new file mode 100755 index 00000000000..d592bbe1ce2 --- /dev/null +++ b/tests/queries/0_stateless/01176_mysql_client_interactive.expect @@ -0,0 +1,26 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 5 +match_max 100000 +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$MYSQL_CLIENT_BINARY \$MYSQL_CLIENT_OPT" +expect "mysql> " + +send -- "USE system;\r" +expect "Database changed" + +send -- "SELECT * FROM one;\r" +expect "| dummy |" +expect "| 0 |" +expect "1 row in set" + +send -- "quit;\r" +expect eof diff --git a/tests/queries/0_stateless/01176_mysql_client_interactive.reference b/tests/queries/0_stateless/01176_mysql_client_interactive.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01177_group_array_moving.reference b/tests/queries/0_stateless/01177_group_array_moving.reference new file mode 100644 index 00000000000..d74c84bb94f --- /dev/null +++ b/tests/queries/0_stateless/01177_group_array_moving.reference @@ -0,0 +1,2 @@ +[-9223372036854775808,0,-9223372036854775808,0,-9223372036854775808,0] [18446744073709551615,18446744073709551614,18446744073709551613,18446744073709551612,18446744073709551611,18446744073709551610] [0,9223372036854775807,9223372036854775805,9223372036854775805,18446744073709551612,18446744073709551610] +[-35888607147294850,-71777214294589700,-107665821441884540,-143554428589179400,-179443035736474240,-215331642883769100] [17592202821648,35184405643296,52776608464944,70368811286592,87961014108240,105553216929888] [0,1,3,3,4,6] diff --git a/tests/queries/0_stateless/01177_group_array_moving.sql b/tests/queries/0_stateless/01177_group_array_moving.sql new file mode 100644 index 00000000000..5689cd95f75 --- /dev/null +++ b/tests/queries/0_stateless/01177_group_array_moving.sql @@ -0,0 +1,4 @@ +SELECT groupArrayMovingSum(257)(-9223372036854775808), groupArrayMovingSum(1048575)(18446744073709551615), groupArrayMovingSum(9223372036854775807)(number * 9223372036854775807) FROM remote('127.0.0.{1..2}', numbers(3)); +SELECT groupArrayMovingAvg(257)(-9223372036854775808), groupArrayMovingAvg(1048575)(18446744073709551615), groupArrayMovingAvg(9223372036854775807)(number * 9223372036854775807) FROM remote('127.0.0.{1..2}', numbers(3)); + +SELECT groupArrayMovingSum(257)(-9223372036854775808), groupArrayMovingSum(1)(10.000100135803223, [NULL, NULL], NULL), groupArrayMovingSum(NULL)(NULL) FROM numbers(1023) FORMAT Null; diff --git a/tests/queries/0_stateless/01178_int_field_to_decimal.reference b/tests/queries/0_stateless/01178_int_field_to_decimal.reference new file mode 100644 index 00000000000..6c256ba2032 --- /dev/null +++ b/tests/queries/0_stateless/01178_int_field_to_decimal.reference @@ -0,0 +1,2 @@ +9.00000000 +10.00000000 diff --git a/tests/queries/0_stateless/01178_int_field_to_decimal.sql b/tests/queries/0_stateless/01178_int_field_to_decimal.sql new file mode 100644 index 00000000000..bbd72e57d70 --- /dev/null +++ b/tests/queries/0_stateless/01178_int_field_to_decimal.sql @@ -0,0 +1,10 @@ +select d from values('d Decimal(8, 8)', 0, 1) where d not in (-1, 0); -- { serverError 69 } +select d from values('d Decimal(8, 8)', 0, 2) where d not in (1, 0); -- { serverError 69 } +select d from values('d Decimal(9, 8)', 0, 3) where d not in (-9223372036854775808, 0); -- { serverError 69 } +select d from values('d Decimal(9, 8)', 0, 4) where d not in (18446744073709551615, 0); -- { serverError 69 } +select d from values('d Decimal(18, 8)', 0, 5) where d not in (-9223372036854775808, 0); -- { serverError 69 } +select d from values('d Decimal(18, 8)', 0, 6) where d not in (18446744073709551615, 0); -- { serverError 69 } +select d from values('d Decimal(26, 8)', 0, 7) where d not in (-9223372036854775808, 0); -- { serverError 69 } +select d from values('d Decimal(27, 8)', 0, 8) where d not in (18446744073709551615, 0); -- { serverError 69 } +select d from values('d Decimal(27, 8)', 0, 9) where d not in (-9223372036854775808, 0); +select d from values('d Decimal(28, 8)', 0, 10) where d not in (18446744073709551615, 0); diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.expect b/tests/queries/0_stateless/01179_insert_values_semicolon.expect new file mode 100755 index 00000000000..c832be72c10 --- /dev/null +++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect @@ -0,0 +1,39 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 5 +match_max 100000 +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT" +expect ":) " + +send -- "DROP TABLE IF EXISTS test_01179\r" +expect "Ok." + +send -- "CREATE TABLE test_01179 (date DateTime) ENGINE=Memory()\r" +expect "Ok." + +send -- "INSERT INTO test_01179 values ('2020-01-01')\r" +expect "Ok." + +send -- "INSERT INTO test_01179 values ('2020-01-01'); \r" +expect "Ok." + +send -- "INSERT INTO test_01179 values ('2020-01-01'); (1) \r" +expect "Cannot read data after semicolon" + +send -- "SELECT date, count() FROM test_01179 GROUP BY date FORMAT TSV\r" +expect "2020-01-01 00:00:00\t3" + +send -- "DROP TABLE test_01179\r" +expect "Ok." + +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.reference b/tests/queries/0_stateless/01179_insert_values_semicolon.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.expect b/tests/queries/0_stateless/01180_client_syntax_errors.expect new file mode 100755 index 00000000000..bc775ce2c57 --- /dev/null +++ b/tests/queries/0_stateless/01180_client_syntax_errors.expect @@ -0,0 +1,32 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 5 +match_max 100000 +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +set basedir [file dirname $argv0] +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT" +expect ":) " + +# Make a query with syntax error +send -- "select \r" +expect "Syntax error: failed at position 7 (end of query):" +expect "Expected one of: " + +# Make another query with syntax error +send -- "CREATE TABLE t4 UUID '57f27aa5-141c-47c5-888a-9563681717f5' AS t1 (`rowNumberInAllBlocks()` UInt64, `toLowCardinality(arrayJoin(\['exchange', 'tables'\]))` LowCardinality(String)) ENGINE = MergeTree \r" +expect "Syntax error: failed at position 93 ('UInt64'):*" + +# Make a query with unmatched parentheses +send -- "select (1, 2\r" +expect "Syntax error: failed at position 8 ('('):" +expect "Unmatched parentheses: (" + +send -- "\4" +expect eof diff --git a/tests/queries/0_stateless/01180_client_syntax_errors.reference b/tests/queries/0_stateless/01180_client_syntax_errors.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh index db9d095fe92..ec07f4d3687 100755 --- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh +++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash +unset CLICKHOUSE_LOG_COMMENT + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.reference b/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.reference similarity index 100% rename from tests/queries/0_stateless/01232_preparing_sets_race_condition.reference rename to tests/queries/0_stateless/01232_preparing_sets_race_condition_long.reference diff --git a/tests/queries/0_stateless/01232_preparing_sets_race_condition.sh b/tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh similarity index 100% rename from tests/queries/0_stateless/01232_preparing_sets_race_condition.sh rename to tests/queries/0_stateless/01232_preparing_sets_race_condition_long.sh diff --git a/tests/queries/0_stateless/01235_live_view_over_distributed.sql b/tests/queries/0_stateless/01235_live_view_over_distributed.sql index dd9ff80f30e..abc628475db 100644 --- a/tests/queries/0_stateless/01235_live_view_over_distributed.sql +++ b/tests/queries/0_stateless/01235_live_view_over_distributed.sql @@ -1,3 +1,4 @@ +set insert_distributed_sync = 1; SET allow_experimental_live_view = 1; DROP TABLE IF EXISTS lv; @@ -7,7 +8,7 @@ DROP TABLE IF EXISTS visits_layer; CREATE TABLE visits(StartDate Date) ENGINE MergeTree ORDER BY(StartDate); CREATE TABLE visits_layer(StartDate Date) ENGINE Distributed(test_cluster_two_shards_localhost, currentDatabase(), 'visits', rand()); -CREATE LIVE VIEW lv AS SELECT * FROM visits_layer ORDER BY StartDate; +CREATE LIVE VIEW lv AS SELECT * FROM visits_layer ORDER BY StartDate; INSERT INTO visits_layer (StartDate) VALUES ('2020-01-01'); INSERT INTO visits_layer (StartDate) VALUES ('2020-01-02'); diff --git a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql b/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql index a572074de3c..de35b0c6c9d 100644 --- a/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql +++ b/tests/queries/0_stateless/01237_live_view_over_distributed_with_subquery_select_table_alias.sql @@ -1,4 +1,5 @@ SET allow_experimental_live_view = 1; +SET insert_distributed_sync = 1; DROP TABLE IF EXISTS lv; DROP TABLE IF EXISTS visits; diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh index 90a7611c7c7..8c900e4c208 100755 --- a/tests/queries/0_stateless/01238_http_memory_tracking.sh +++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh @@ -18,3 +18,6 @@ yes 'SELECT 1' 2>/dev/null | { } | grep -x -c 1 wait + +# Reset max_memory_usage_for_user, so it will not affect other tests +${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null" diff --git a/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference b/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference index 096d5703292..72a41ac1d84 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference +++ b/tests/queries/0_stateless/01259_combinator_distinct_distributed.reference @@ -2,3 +2,7 @@ [0,1,2,3,4,5,6,7,8,9,10,11,12] 20 0.49237 +78 +[0,1,2,3,4,5,6,7,8,9,10,11,12] +20 +0.49237 diff --git a/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql b/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql index f851e64dbcb..f95d2d87b8e 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql +++ b/tests/queries/0_stateless/01259_combinator_distinct_distributed.sql @@ -1,3 +1,12 @@ +SET distributed_aggregation_memory_efficient = 1; + +SELECT sum(DISTINCT number % 13) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); +SELECT arraySort(groupArray(DISTINCT number % 13)) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); +SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); +SELECT round(corrStable(DISTINCT x, y), 5) FROM (SELECT number % 10 AS x, number % 5 AS y FROM remote('127.0.0.{1,2}', numbers(1000))); + +SET distributed_aggregation_memory_efficient = 0; + SELECT sum(DISTINCT number % 13) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); SELECT arraySort(groupArray(DISTINCT number % 13)) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM remote('127.0.0.{1,2}', numbers_mt(100000)); diff --git a/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql b/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql index a35dc7cca56..8799680125f 100644 --- a/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql +++ b/tests/queries/0_stateless/01274_alter_rename_column_distributed.sql @@ -1,3 +1,5 @@ +set insert_distributed_sync = 1; + DROP TABLE IF EXISTS visits; DROP TABLE IF EXISTS visits_dist; diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.reference b/tests/queries/0_stateless/01280_ttl_where_group_by.reference index ad20d38f2e6..7fe00709dee 100644 --- a/tests/queries/0_stateless/01280_ttl_where_group_by.reference +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.reference @@ -1,20 +1,26 @@ +ttl_01280_1 1 1 0 4 1 2 3 7 1 3 0 5 2 1 0 1 2 1 20 1 +ttl_01280_2 1 1 [0,2,3] 4 1 1 [5,4,1] 13 1 3 [1,0,1,0] 17 2 1 [3,1,0,3] 8 3 1 [2,4,5] 8 +ttl_01280_3 1 1 0 4 -1 3 10 6 +1 1 10 6 2 1 0 3 -3 5 8 2 +3 1 8 2 +ttl_01280_4 1 1 0 4 -3 3 13 9 +10 2 13 9 +ttl_01280_5 1 2 7 5 2 3 6 5 -1 2 3 5 -2 3 3 5 +ttl_01280_6 +1 5 3 5 +2 10 3 5 diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by.sh b/tests/queries/0_stateless/01280_ttl_where_group_by.sh index 5ca79951a46..9f30c7c5872 100755 --- a/tests/queries/0_stateless/01280_ttl_where_group_by.sh +++ b/tests/queries/0_stateless/01280_ttl_where_group_by.sh @@ -14,6 +14,7 @@ function optimize() done } +echo "ttl_01280_1" $CLICKHOUSE_CLIENT -n --query " create table ttl_01280_1 (a Int, b Int, x Int, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second delete where x % 10 == 0 and y > 5; insert into ttl_01280_1 values (1, 1, 0, 4, now() + 10); @@ -30,6 +31,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_1 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_2" +echo "ttl_01280_2" $CLICKHOUSE_CLIENT -n --query " create table ttl_01280_2 (a Int, b Int, x Array(Int32), y Double, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set x = minForEach(x), y = sum(y), d = max(d); insert into ttl_01280_2 values (1, 1, array(0, 2, 3), 4, now() + 10); @@ -48,8 +50,9 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_2 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_3" +echo "ttl_01280_3" $CLICKHOUSE_CLIENT -n --query " -create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set x = argMax(x, d), y = argMax(y, d), d = max(d); +create table ttl_01280_3 (a Int, b Int, x Int64, y Int, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set b = min(b), x = argMax(x, d), y = argMax(y, d), d = max(d); insert into ttl_01280_3 values (1, 1, 0, 4, now() + 10); insert into ttl_01280_3 values (1, 1, 10, 6, now() + 1); insert into ttl_01280_3 values (1, 2, 3, 7, now()); @@ -66,6 +69,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_3 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_4" +echo "ttl_01280_4" $CLICKHOUSE_CLIENT -n --query " create table ttl_01280_4 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), -(a + b)) ttl d + interval 1 second group by toDate(d) set x = sum(x), y = max(y); insert into ttl_01280_4 values (1, 1, 0, 4, now() + 10); @@ -80,7 +84,8 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_4 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_5" -$CLICKHOUSE_CLIENT -n --query "create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x); +echo "ttl_01280_5" +$CLICKHOUSE_CLIENT -n --query "create table ttl_01280_5 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a set x = sum(x), b = argMax(b, -b); insert into ttl_01280_5 values (1, 2, 3, 5, now()); insert into ttl_01280_5 values (2, 10, 1, 5, now()); insert into ttl_01280_5 values (2, 3, 5, 5, now()); @@ -92,6 +97,7 @@ $CLICKHOUSE_CLIENT --query "select a, b, x, y from ttl_01280_5 ORDER BY a, b, x, $CLICKHOUSE_CLIENT --query "drop table if exists ttl_01280_6" +echo "ttl_01280_6" $CLICKHOUSE_CLIENT -n --query " create table ttl_01280_6 (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a, -b) ttl d + interval 1 second group by toDate(d), a; insert into ttl_01280_6 values (1, 2, 3, 5, now()); diff --git a/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql b/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql index f2c26a3d495..b273e065bcc 100644 --- a/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql +++ b/tests/queries/0_stateless/01280_ttl_where_group_by_negative.sql @@ -1,7 +1,4 @@ create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by x set y = max(y); -- { serverError 450} create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by b set y = max(y); -- { serverError 450} create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b, x set y = max(y); -- { serverError 450} -create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a set b = min(b), y = max(y); -- { serverError 450} create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (a, b) ttl d + interval 1 second group by a, b set y = max(y), y = max(y); -- { serverError 450} -create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (toDate(d), a) ttl d + interval 1 second group by toDate(d), a set d = min(d), b = max(b); -- { serverError 450} -create table ttl_01280_error (a Int, b Int, x Int64, y Int64, d DateTime) engine = MergeTree order by (d, -(a + b)) ttl d + interval 1 second group by d, -(a + b) set a = sum(a), b = min(b); -- { serverError 450} diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index 285e2ab8dad..4667c76cb60 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -33,7 +33,7 @@ function execute_group_by() "--max_memory_usage_for_user="$((150<<20)) "--max_threads=2" ) - execute_null "${opts[@]}" <<<'SELECT uniq(number) FROM numbers_mt(toUInt64(1e6)) GROUP BY number % 5e5' + execute_null "${opts[@]}" <<<'SELECT uniq(number) FROM numbers_mt(1e6) GROUP BY number % 5e5' } # This is needed to keep at least one running query for user for the time of test. @@ -42,3 +42,6 @@ execute_group_by # if memory accounting will be incorrect, the second query will be failed with MEMORY_LIMIT_EXCEEDED execute_group_by wait + +# Reset max_memory_usage_for_user, so it will not affect other tests +${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null" diff --git a/tests/queries/0_stateless/01297_create_quota.reference b/tests/queries/0_stateless/01297_create_quota.reference index b637f4f3296..375d67346be 100644 --- a/tests/queries/0_stateless/01297_create_quota.reference +++ b/tests/queries/0_stateless/01297_create_quota.reference @@ -57,7 +57,10 @@ q2_01297 local directory [] [5259492] 0 ['r1_01297','u1_01297'] [] q3_01297 local directory ['client_key','user_name'] [5259492,15778476] 0 [] [] q4_01297 local directory [] [604800] 1 [] ['u1_01297'] -- system.quota_limits -q2_01297 5259492 0 100 11 1000 10000 1001 10001 2.5 -q3_01297 5259492 0 \N \N 1002 \N \N \N \N -q3_01297 15778476 0 100 11 \N \N \N \N \N -q4_01297 604800 0 \N \N \N \N \N \N \N +q2_01297 5259492 0 100 \N \N 11 1000 10000 1001 10001 2.5 +q3_01297 5259492 0 \N \N \N \N 1002 \N \N \N \N +q3_01297 15778476 0 100 \N \N 11 \N \N \N \N \N +q4_01297 604800 0 \N \N \N \N \N \N \N \N \N +-- query_selects query_inserts +CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297 +CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297 diff --git a/tests/queries/0_stateless/01297_create_quota.sql b/tests/queries/0_stateless/01297_create_quota.sql index a3fb8331e16..7d55b95601f 100644 --- a/tests/queries/0_stateless/01297_create_quota.sql +++ b/tests/queries/0_stateless/01297_create_quota.sql @@ -125,5 +125,13 @@ SELECT '-- system.quota_limits'; SELECT * FROM system.quota_limits WHERE quota_name LIKE 'q%\_01297' ORDER BY quota_name, duration; DROP QUOTA q1_01297, q2_01297, q3_01297, q4_01297; +SELECT '-- query_selects query_inserts'; +CREATE QUOTA q1_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_selects = 1 TO r1_01297; +CREATE QUOTA q2_01297 KEYED BY user_name FOR INTERVAL 1 minute MAX query_inserts = 1 TO r1_01297; +SHOW CREATE QUOTA q1_01297; +SHOW CREATE QUOTA q2_01297; +DROP QUOTA q1_01297, q2_01297; + DROP ROLE r1_01297; DROP USER u1_01297; + diff --git a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql index 9a554ead776..596e90adfd6 100644 --- a/tests/queries/0_stateless/01414_low_cardinality_nullable.sql +++ b/tests/queries/0_stateless/01414_low_cardinality_nullable.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS lc_nullable; CREATE TABLE lc_nullable ( - order_key Array(LowCardinality(Nullable((UInt64)))), + order_key Array(LowCardinality(Nullable(UInt64))), i8 Array(LowCardinality(Nullable(Int8))), i16 Array(LowCardinality(Nullable(Int16))), @@ -14,10 +14,10 @@ CREATE TABLE lc_nullable ( f32 Array(LowCardinality(Nullable(Float32))), f64 Array(LowCardinality(Nullable(Float64))), - date Array(LowCardinality(Nullable((Date)))), + date Array(LowCardinality(Nullable(Date))), date_time Array(LowCardinality(Nullable(DateTime('Europe/Moscow')))), - str Array(LowCardinality(Nullable((String)))), + str Array(LowCardinality(Nullable(String))), fixed_string Array(LowCardinality(Nullable(FixedString(5)))) ) ENGINE = MergeTree() ORDER BY order_key; diff --git a/tests/queries/0_stateless/01443_merge_truncate_long.reference b/tests/queries/0_stateless/01443_merge_truncate_long.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01443_merge_truncate.sh b/tests/queries/0_stateless/01443_merge_truncate_long.sh similarity index 100% rename from tests/queries/0_stateless/01443_merge_truncate.sh rename to tests/queries/0_stateless/01443_merge_truncate_long.sh diff --git a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh index 24667f26363..bf1d5b31682 100755 --- a/tests/queries/0_stateless/01455_opentelemetry_distributed.sh +++ b/tests/queries/0_stateless/01455_opentelemetry_distributed.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash set -ue +unset CLICKHOUSE_LOG_COMMENT + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql b/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql index de470fe6a57..5b59bc065dd 100644 --- a/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql +++ b/tests/queries/0_stateless/01455_optimize_trivial_insert_select.sql @@ -1,7 +1,9 @@ SET max_insert_threads = 1, max_threads = 100, min_insert_block_size_rows = 1048576, max_block_size = 65536; -CREATE TEMPORARY TABLE t (x UInt64); +DROP TABLE IF EXISTS t; +CREATE TABLE t (x UInt64) ENGINE = StripeLog; -- For trivial INSERT SELECT, max_threads is lowered to max_insert_threads and max_block_size is changed to min_insert_block_size_rows. INSERT INTO t SELECT * FROM numbers_mt(1000000); SET max_threads = 1; -- If data was inserted by more threads, we will probably see data out of order. SELECT DISTINCT blockSize(), runningDifference(x) FROM t; +DROP TABLE t; diff --git a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql index 1c9c1e1ef44..9399f06220b 100644 --- a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql +++ b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql @@ -18,6 +18,9 @@ DROP TABLE tmp; DETACH DATABASE test_01457; ATTACH DATABASE test_01457; +-- To suppress "Structure does not match (...), implicit conversion will be done." message +SET send_logs_level='error'; + CREATE TABLE tmp (n Int8) ENGINE=Memory; INSERT INTO test_01457.tf_remote_explicit_structure VALUES ('42'); SELECT * FROM tmp; diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index ce85dd72abf..16832c4fc59 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -61,3 +61,8 @@ SELECT ProfileEvents.Values[indexOf(ProfileEvents.Names, 'FileOpen')] FROM system.query_log WHERE (type = 'QueryFinish') AND (lower(query) LIKE lower('SELECT m.% FROM %t_map%')) AND event_time > now() - INTERVAL 10 SECOND AND current_database = currentDatabase(); + +DROP TABLE t_arr; +DROP TABLE t_nul; +DROP TABLE t_tup; +DROP TABLE t_map; diff --git a/tests/queries/0_stateless/01475_read_subcolumns_2.sql b/tests/queries/0_stateless/01475_read_subcolumns_2.sql index b8959cf27f7..e827d6c360a 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns_2.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns_2.sql @@ -12,7 +12,7 @@ CREATE TABLE subcolumns arr2 Array(Array(Nullable(String))), lc LowCardinality(String), nested Nested(col1 String, col2 Nullable(UInt32)) -) +) ENGINE = MergeTree order by tuple() SETTINGS min_bytes_for_wide_part = '10M'; INSERT INTO subcolumns VALUES (([1, NULL], 2, 'a'), ['foo', NULL, 'bar'], [['123'], ['456', '789']], 'qqqq', ['zzz', 'xxx'], [42, 43]); @@ -37,7 +37,7 @@ CREATE TABLE subcolumns arr2 Array(Array(Nullable(String))), lc LowCardinality(String), nested Nested(col1 String, col2 Nullable(UInt32)) -) +) ENGINE = MergeTree order by tuple() SETTINGS min_bytes_for_wide_part = 0; INSERT INTO subcolumns VALUES (([1, NULL], 2, 'a'), ['foo', NULL, 'bar'], [['123'], ['456', '789']], 'qqqq', ['zzz', 'xxx'], [42, 43]); @@ -47,3 +47,5 @@ SELECT t.a.size0, t.a.null, t.u, t.s, t.s.null FROM subcolumns; SELECT sumArray(arr.null), sum(arr.size0) FROM subcolumns; SELECT arr2, arr2.size0, arr2.size1, arr2.null FROM subcolumns; -- SELECT nested.col1, nested.col2, nested.size0, nested.size0, nested.col2.null FROM subcolumns; + +DROP TABLE subcolumns; diff --git a/tests/queries/0_stateless/01475_read_subcolumns_3.sql b/tests/queries/0_stateless/01475_read_subcolumns_3.sql index 66bcd7dbc91..54598f19bdc 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns_3.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns_3.sql @@ -37,3 +37,5 @@ SELECT count() FROM map_subcolumns PREWHERE has(m.keys, 'b'); SELECT id, m.size0 FROM map_subcolumns; SELECT count() FROM map_subcolumns WHERE m.size0 > 2; + +DROP TABLE map_subcolumns; diff --git a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh index 684d65ceb25..be22b1b4185 100755 --- a/tests/queries/0_stateless/01475_read_subcolumns_storages.sh +++ b/tests/queries/0_stateless/01475_read_subcolumns_storages.sh @@ -23,3 +23,5 @@ for engine in "${ENGINES[@]}"; do $CLICKHOUSE_CLIENT --query "SELECT * FROM subcolumns" $CLICKHOUSE_CLIENT --query "SELECT n, n.null, a1, a1.size0, a2, a2.size0, a2.size1, a2.size2, a3, a3.size0, a3.null, t, t.s, t.v, m, m.keys, m.values FROM subcolumns" done + +$CLICKHOUSE_CLIENT -q "DROP TABLE subcolumns" diff --git a/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference b/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference new file mode 100644 index 00000000000..f8f36434a82 --- /dev/null +++ b/tests/queries/0_stateless/01506_ttl_same_with_order_by.reference @@ -0,0 +1,4 @@ +2020-01-01 00:00:00 3 +2020-01-01 00:00:00 2020-01-01 00:00:00 111 +1 +0 diff --git a/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql b/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql new file mode 100644 index 00000000000..7a0fb86330b --- /dev/null +++ b/tests/queries/0_stateless/01506_ttl_same_with_order_by.sql @@ -0,0 +1,78 @@ +DROP TABLE IF EXISTS derived_metrics_local; + +CREATE TABLE derived_metrics_local +( + timestamp DateTime, + bytes UInt64 +) +ENGINE=SummingMergeTree() +PARTITION BY toYYYYMMDD(timestamp) +ORDER BY (toStartOfHour(timestamp), timestamp) +TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR GROUP BY toStartOfHour(timestamp) +SET bytes=max(bytes); + +INSERT INTO derived_metrics_local values('2020-01-01 00:00:00', 1); +INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 3); +INSERT INTO derived_metrics_local values('2020-01-01 00:02:00', 2); + +OPTIMIZE TABLE derived_metrics_local FINAL; +SELECT * FROM derived_metrics_local; + +DROP TABLE derived_metrics_local; + +CREATE TABLE derived_metrics_local +( + timestamp DateTime, + timestamp_h DateTime materialized toStartOfHour(timestamp), + bytes UInt64 +) +ENGINE=SummingMergeTree() +PARTITION BY toYYYYMMDD(timestamp) +ORDER BY (timestamp_h, timestamp) +TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR GROUP BY timestamp_h +SET bytes=max(bytes), timestamp = toStartOfHour(any(timestamp)); + +INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111); +INSERT INTO derived_metrics_local values('2020-01-01 00:19:22', 22); +INSERT INTO derived_metrics_local values('2020-01-01 00:59:02', 1); + +OPTIMIZE TABLE derived_metrics_local FINAL; +SELECT timestamp, timestamp_h, bytes FROM derived_metrics_local; + +DROP TABLE IF EXISTS derived_metrics_local; + +CREATE TABLE derived_metrics_local +( + timestamp DateTime, + bytes UInt64 TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR +) +ENGINE=MergeTree() +ORDER BY (toStartOfHour(timestamp), timestamp) +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111) ('2020-01-01 00:19:22', 22) ('2100-01-01 00:19:22', 1); + +OPTIMIZE TABLE derived_metrics_local FINAL; +SELECT sum(bytes) FROM derived_metrics_local; + +DROP TABLE IF EXISTS derived_metrics_local; + +CREATE TABLE derived_metrics_local +( + timestamp DateTime, + bytes UInt64 +) +ENGINE=MergeTree() +PARTITION BY toYYYYMMDD(timestamp) +ORDER BY (toStartOfHour(timestamp), timestamp) +TTL toStartOfHour(timestamp) + INTERVAL 1 HOUR +SETTINGS min_bytes_for_wide_part = 0; + +INSERT INTO derived_metrics_local values('2020-01-01 00:01:00', 111); +INSERT INTO derived_metrics_local values('2020-01-01 00:19:22', 22); +INSERT INTO derived_metrics_local values('2020-01-01 00:59:02', 1); + +OPTIMIZE TABLE derived_metrics_local FINAL; +SELECT count() FROM derived_metrics_local; + +DROP TABLE IF EXISTS derived_metrics_local; diff --git a/tests/queries/0_stateless/01508_partition_pruning.reference b/tests/queries/0_stateless/01508_partition_pruning.reference deleted file mode 100644 index 0cc40d23b41..00000000000 --- a/tests/queries/0_stateless/01508_partition_pruning.reference +++ /dev/null @@ -1,244 +0,0 @@ ---------- tMM ---------------------------- -select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15'); -0 0 -Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges - -select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01'); -2 2880 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15'); -1 1440 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15'; -0 0 -Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; -2 2880 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; -1 1440 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; -1 1440 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; -3 15000 -Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00'); -6 30000 -Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges - -select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00'); -0 0 -Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges - -select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; -2 6440 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; -2 2880 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00'; -1 1440 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00'; -2 6440 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; -4 20000 -Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00'; -1 1440 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; -3 11440 -Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; -1 1440 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; -3 11440 -Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00'; -1 1440 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; -3 9999 -Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15'; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01'; -4 20000 -Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges - -select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01'; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; -2 9999 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; -1 10000 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; -2 20000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - ---------- tDD ---------------------------- -select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24'); -1 10000 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24'); -1 10000 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24'; -1 10000 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26'; -3 40000 -Selected 3 parts by partition key, 3 parts by primary key, 4 marks by primary key, 4 marks to read from 3 ranges - -select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26'; -3 40000 -Selected 3 parts by partition key, 3 parts by primary key, 4 marks by primary key, 4 marks to read from 3 ranges - ---------- sDD ---------------------------- -select uniqExact(_part), count() from sDD; -6 30000 -Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010; -3 9999 -Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010; -2 9999 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110; -0 0 -Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC'); -3 11440 -Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC'); -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from sDD where d >= 1598918400000; -4 20000 -Selected 4 parts by partition key, 4 parts by primary key, 4 marks by primary key, 4 marks to read from 4 ranges - -select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010; -3 10001 -Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges - ---------- xMM ---------------------------- -select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00'; -3 10001 -Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00'; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1; -1 1 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; -2 5001 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; -1 5000 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where a = 1; -3 15000 -Selected 3 parts by partition key, 3 parts by primary key, 3 marks by primary key, 3 marks to read from 3 ranges - -select uniqExact(_part), count() from xMM where a = 66; -0 0 -Selected 0 parts by partition key, 0 parts by primary key, 0 marks by primary key, 0 marks to read from 0 ranges - -select uniqExact(_part), count() from xMM where a <> 66; -6 30000 -Selected 6 parts by partition key, 6 parts by primary key, 6 marks by primary key, 6 marks to read from 6 ranges - -select uniqExact(_part), count() from xMM where a = 2; -2 10000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where a = 1; -2 15000 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; -1 10000 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - -select uniqExact(_part), count() from xMM where a <> 66; -5 30000 -Selected 5 parts by partition key, 5 parts by primary key, 5 marks by primary key, 5 marks to read from 5 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; -2 5001 -Selected 2 parts by partition key, 2 parts by primary key, 2 marks by primary key, 2 marks to read from 2 ranges - -select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; -1 5000 -Selected 1 parts by partition key, 1 parts by primary key, 1 marks by primary key, 1 marks to read from 1 ranges - diff --git a/tests/queries/0_stateless/01508_partition_pruning.queries b/tests/queries/0_stateless/01508_partition_pruning_long.queries similarity index 100% rename from tests/queries/0_stateless/01508_partition_pruning.queries rename to tests/queries/0_stateless/01508_partition_pruning_long.queries diff --git a/tests/queries/0_stateless/01508_partition_pruning_long.reference b/tests/queries/0_stateless/01508_partition_pruning_long.reference new file mode 100644 index 00000000000..70f529c6058 --- /dev/null +++ b/tests/queries/0_stateless/01508_partition_pruning_long.reference @@ -0,0 +1,244 @@ +--------- tMM ---------------------------- +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-15'); +0 0 +Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-09-01'); +2 2880 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toDate(d)=toDate('2020-10-15'); +1 1440 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toDate(d)='2020-09-15'; +0 0 +Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)=202009; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20200816; +2 2880 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMMDD(d)=20201015; +1 1440 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toDate(d)='2020-10-15'; +1 1440 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where d >= '2020-09-01 00:00:00' and d<'2020-10-15 00:00:00'; +3 15000 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from tMM where d >= '2020-01-16 00:00:00' and d < toDateTime('2021-08-17 00:00:00'); +6 30000 +Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from tMM where d >= '2020-09-16 00:00:00' and d < toDateTime('2020-10-01 00:00:00'); +0 0 +Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from tMM where d >= '2020-09-12 00:00:00' and d < '2020-10-16 00:00:00'; +2 6440 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-12 00:00:00'; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-09-01 00:00:00'; +2 2880 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toStartOfDay(d) = '2020-10-01 00:00:00'; +1 1440 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toStartOfDay(d) >= '2020-09-15 00:00:00' and d < '2020-10-16 00:00:00'; +2 6440 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; +4 20000 +Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202009; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010 and toStartOfDay(d) = '2020-10-01 00:00:00'; +1 1440 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) >= 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +3 11440 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +1 1440 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202009 and toStartOfDay(d) < '2020-10-02 00:00:00'; +3 11440 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010 and toStartOfDay(d) < '2020-10-02 00:00:00'; +1 1440 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +3 9999 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-15'; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01'; +4 20000 +Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges + +select uniqExact(_part), count() from tMM where toStartOfMonth(d) >= '2020-09-01' and toStartOfMonth(d) < '2020-10-01'; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d-1)+1 = 202010; +2 9999 +Selected 2/3 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d)+1 > 202010; +1 10000 +Selected 1/3 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from tMM where toYYYYMM(d) between 202009 and 202010; +2 20000 +Selected 2/3 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +--------- tDD ---------------------------- +select uniqExact(_part), count() from tDD where toDate(d)=toDate('2020-09-24'); +1 10000 +Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = toDate('2020-09-24'); +1 10000 +Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) = '2020-09-24'; +1 10000 +Selected 1/4 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() FROM tDD WHERE toDate(d) >= '2020-09-23' and toDate(d) <= '2020-09-26'; +3 40000 +Selected 3/4 parts by partition key, 3 parts by primary key, 4/7 marks by primary key, 4 marks to read from 3 ranges + +select uniqExact(_part), count() FROM tDD WHERE toYYYYMMDD(d) >= 20200923 and toDate(d) <= '2020-09-26'; +3 40000 +Selected 3/4 parts by partition key, 3 parts by primary key, 4/7 marks by primary key, 4 marks to read from 3 ranges + +--------- sDD ---------------------------- +select uniqExact(_part), count() from sDD; +6 30000 +Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1)+1 = 202010; +3 9999 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202010; +2 9999 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) = 202110; +0 0 +Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toStartOfDay(toDateTime(intDiv(d,1000),'UTC')) < toDateTime('2020-10-02 00:00:00','UTC'); +3 11440 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from sDD where toYYYYMM(toDateTime(intDiv(d,1000),'UTC'))+1 > 202009 and toDateTime(intDiv(d,1000),'UTC') < toDateTime('2020-10-01 00:00:00','UTC'); +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from sDD where d >= 1598918400000; +4 20000 +Selected 4/6 parts by partition key, 4 parts by primary key, 4/8 marks by primary key, 4 marks to read from 4 ranges + +select uniqExact(_part), count() from sDD where d >= 1598918400000 and toYYYYMM(toDateTime(intDiv(d,1000),'UTC')-1) < 202010; +3 10001 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges + +--------- xMM ---------------------------- +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00'; +3 10001 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00'; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a=1; +1 1 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +2 5001 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; +1 5000 +Selected 1/6 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-11-01 00:00:00' and a = 1; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where a = 1; +3 15000 +Selected 3/6 parts by partition key, 3 parts by primary key, 3/6 marks by primary key, 3 marks to read from 3 ranges + +select uniqExact(_part), count() from xMM where a = 66; +0 0 +Selected 0/6 parts by partition key, 0 parts by primary key, 0/0 marks by primary key, 0 marks to read from 0 ranges + +select uniqExact(_part), count() from xMM where a <> 66; +6 30000 +Selected 6/6 parts by partition key, 6 parts by primary key, 6/12 marks by primary key, 6 marks to read from 6 ranges + +select uniqExact(_part), count() from xMM where a = 2; +2 10000 +Selected 2/6 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where a = 1; +2 15000 +Selected 2/5 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where toStartOfDay(d) >= '2020-10-01 00:00:00'; +1 10000 +Selected 1/5 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + +select uniqExact(_part), count() from xMM where a <> 66; +5 30000 +Selected 5/5 parts by partition key, 5 parts by primary key, 5/10 marks by primary key, 5 marks to read from 5 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d <= '2020-10-01 00:00:00' and a<>3; +2 5001 +Selected 2/5 parts by partition key, 2 parts by primary key, 2/4 marks by primary key, 2 marks to read from 2 ranges + +select uniqExact(_part), count() from xMM where d >= '2020-09-01 00:00:00' and d < '2020-10-01 00:00:00' and a<>3; +1 5000 +Selected 1/5 parts by partition key, 1 parts by primary key, 1/2 marks by primary key, 1 marks to read from 1 ranges + diff --git a/tests/queries/0_stateless/01508_partition_pruning.sh b/tests/queries/0_stateless/01508_partition_pruning_long.sh similarity index 88% rename from tests/queries/0_stateless/01508_partition_pruning.sh rename to tests/queries/0_stateless/01508_partition_pruning_long.sh index b5ec6388d5c..1b3c524ac77 100755 --- a/tests/queries/0_stateless/01508_partition_pruning.sh +++ b/tests/queries/0_stateless/01508_partition_pruning_long.sh @@ -4,8 +4,8 @@ # Description of test result: # Test the correctness of the partition # pruning -# -# Script executes queries from a file 01508_partition_pruning.queries (1 line = 1 query) +# +# Script executes queries from a file 01508_partition_pruning_long.queries (1 line = 1 query) # Queries are started with 'select' (but NOT with 'SELECT') are executed with log_level=debug #------------------------------------------------------------------------------------------- @@ -18,7 +18,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) #export CURDIR=. -queries="${CURDIR}/01508_partition_pruning.queries" +queries="${CURDIR}/01508_partition_pruning_long.queries" while IFS= read -r sql do [ -z "$sql" ] && continue @@ -30,9 +30,7 @@ do ${CLICKHOUSE_CLIENT} --query "$sql" 2>&1 | grep -oh "Selected .* parts by partition key, *. parts by primary key, .* marks by primary key, .* marks to read from .* ranges.*$" CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/--send_logs_level=debug/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/g') echo "" - else + else ${CLICKHOUSE_CLIENT} --query "$sql" - fi + fi done < "$queries" - - diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.reference b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql similarity index 92% rename from tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql rename to tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql index 6aa38a914f7..87c66609421 100644 --- a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql +++ b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory_long.sql @@ -1,7 +1,7 @@ drop table if exists data_01513; create table data_01513 (key String) engine=MergeTree() order by key; -- 10e3 groups, 1e3 keys each -insert into data_01513 select number%10e3 from numbers(toUInt64(2e6)); +insert into data_01513 select number%10e3 from numbers(2e6); -- reduce number of parts to 1 optimize table data_01513 final; diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference index 4c85a1d418a..a3f2106cd5f 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.reference @@ -1,6 +1,9 @@ -2000-01-01 00:00:00 0 -2020-01-01 00:00:00 0 -2000-01-01 00:00:00 1 -2020-01-01 00:00:00 1 -2000-01-01 00:00:00 2 -2020-01-01 00:00:00 2 +2000-01-01 00:00:00 0 +2020-01-01 00:00:00 0 +2000-01-01 00:00:00 1 +2020-01-01 00:00:00 1 +2000-01-01 00:00:00 2 +2020-01-01 00:00:00 2 +1 +499999 +5 diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index d332946605d..25c47c008bd 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -1,15 +1,40 @@ DROP TABLE IF EXISTS select_final; -CREATE TABLE select_final (t DateTime, x Int32) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY x; +SET do_not_merge_across_partitions_select_final = 1; -INSERT INTO select_final SELECT toDate('2000-01-01'), number FROM numbers(2); -INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1 FROM numbers(2); +CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); -INSERT INTO select_final SELECT toDate('2020-01-01'), number FROM numbers(2); -INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1 FROM numbers(2); +INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2); +INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1, '' FROM numbers(2); + +INSERT INTO select_final SELECT toDate('2020-01-01'), number, '' FROM numbers(2); +INSERT INTO select_final SELECT toDate('2020-01-01'), number + 1, '' FROM numbers(2); -SELECT * FROM select_final FINAL ORDER BY x SETTINGS do_not_merge_across_partitions_select_final = 1; +SELECT * FROM select_final FINAL ORDER BY x; + +TRUNCATE TABLE select_final; + +INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2); +INSERT INTO select_final SELECT toDate('2000-01-01'), number, 'updated' FROM numbers(2); + +OPTIMIZE TABLE select_final FINAL; + +INSERT INTO select_final SELECT toDate('2020-01-01'), number, '' FROM numbers(2); +INSERT INTO select_final SELECT toDate('2020-01-01'), number, 'updated' FROM numbers(2); + +SELECT max(x) FROM select_final FINAL where string = 'updated'; + +TRUNCATE TABLE select_final; + +INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(500000); + +OPTIMIZE TABLE select_final FINAL; + +SELECT max(x) FROM select_final FINAL; + +SYSTEM FLUSH LOGS; + +SELECT length(thread_ids) FROM system.query_log WHERE query='SELECT max(x) FROM select_final FINAL;' AND type='QueryFinish' AND current_database = currentDatabase() ORDER BY event_time DESC LIMIT 1; DROP TABLE select_final; - diff --git a/tests/queries/0_stateless/01526_initial_query_id.sh b/tests/queries/0_stateless/01526_initial_query_id.sh index e77764ee34e..f9d739b57cd 100755 --- a/tests/queries/0_stateless/01526_initial_query_id.sh +++ b/tests/queries/0_stateless/01526_initial_query_id.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash set -ue +unset CLICKHOUSE_LOG_COMMENT + CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 6374d6fca21..38c80617334 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -8,7 +8,7 @@ CREATE TABLE nested col2 Nested(a UInt32, n Nested(s String, b UInt32)), col3 Nested(n1 Nested(a UInt32, b UInt32), n2 Nested(s String, t String)) ) -ENGINE = MergeTree +ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; @@ -55,7 +55,7 @@ CREATE TABLE nested id UInt32, col1 Nested(a UInt32, n Nested(s String, b UInt32)) ) -ENGINE = MergeTree +ENGINE = MergeTree ORDER BY id SETTINGS min_bytes_for_wide_part = 0; @@ -64,3 +64,5 @@ SELECT id % 10, sum(length(col1)), sumArray(arrayMap(x -> length(x), col1.n.b)) SELECT arraySum(col1.a), arrayMap(x -> x * x * 2, col1.a) FROM nested ORDER BY id LIMIT 5; SELECT untuple(arrayJoin(arrayJoin(col1.n))) FROM nested ORDER BY id LIMIT 10 OFFSET 10; + +DROP TABLE nested; diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.reference b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.reference similarity index 100% rename from tests/queries/0_stateless/01541_max_memory_usage_for_user.reference rename to tests/queries/0_stateless/01541_max_memory_usage_for_user_long.reference diff --git a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh similarity index 94% rename from tests/queries/0_stateless/01541_max_memory_usage_for_user.sh rename to tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh index c81bd1a6ce4..32877bfd0fe 100755 --- a/tests/queries/0_stateless/01541_max_memory_usage_for_user.sh +++ b/tests/queries/0_stateless/01541_max_memory_usage_for_user_long.sh @@ -66,4 +66,7 @@ echo 'OK' ${CLICKHOUSE_CLIENT} --query "DROP USER test_01541"; +# Reset max_memory_usage_for_user, so it will not affect other tests +${CLICKHOUSE_CLIENT} --max_memory_usage_for_user=0 -q "SELECT 1 FORMAT Null" + exit 0 diff --git a/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference b/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference index 6f71b6263c0..443b90b80a5 100644 --- a/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference +++ b/tests/queries/0_stateless/01544_fromModifiedJulianDay.reference @@ -3,6 +3,7 @@ Invocation with constant 1858-11-17 2020-11-01 \N +\N or null 2020-11-01 \N diff --git a/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql b/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql index 4e50351d191..5e682a942d5 100644 --- a/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql +++ b/tests/queries/0_stateless/01544_fromModifiedJulianDay.sql @@ -5,6 +5,7 @@ SELECT fromModifiedJulianDay(-1); SELECT fromModifiedJulianDay(0); SELECT fromModifiedJulianDay(59154); SELECT fromModifiedJulianDay(NULL); +SELECT fromModifiedJulianDay(CAST(NULL, 'Nullable(Int64)')); SELECT fromModifiedJulianDay(-678942); -- { serverError 490 } SELECT fromModifiedJulianDay(2973484); -- { serverError 490 } diff --git a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh index d7ee2840763..8c4900043d0 100755 --- a/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh +++ b/tests/queries/0_stateless/01548_parallel_parsing_max_memory.sh @@ -4,6 +4,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > 1g.csv +yes http://foobarfoobarfoobarfoobarfoobarfoobarfoobar.com | head -c1G > ${CLICKHOUSE_TMP}/1g.csv -$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('1g.csv', 'TSV', 'URL String')" \ No newline at end of file +$CLICKHOUSE_LOCAL --stacktrace --input_format_parallel_parsing=1 --max_memory_usage=100Mi -q "select count() from file('${CLICKHOUSE_TMP}/1g.csv', 'TSV', 'URL String')" diff --git a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference index feca2cae5ea..a1a1814a581 100644 --- a/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01562_optimize_monotonous_functions_in_order_by.reference @@ -23,7 +23,7 @@ Expression (Projection) FinishSorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) SELECT timestamp, key @@ -37,7 +37,7 @@ Expression (Projection) FinishSorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) - ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) SELECT timestamp, key diff --git a/tests/queries/0_stateless/01564_test_hint_woes.reference b/tests/queries/0_stateless/01564_test_hint_woes.reference index 892ca733d7c..9ce4572eab4 100644 --- a/tests/queries/0_stateless/01564_test_hint_woes.reference +++ b/tests/queries/0_stateless/01564_test_hint_woes.reference @@ -29,3 +29,5 @@ INSERT INTO t0(c0, c1) VALUES ("1",1) ; -- { clientError 47 } INSERT INTO t0(c0, c1) VALUES ('1', 1) ; -- the return code must be zero after the final query has failed with expected error insert into values_01564 values (11); -- { serverError 469 } +drop table t0; +drop table values_01564; diff --git a/tests/queries/0_stateless/01564_test_hint_woes.sql b/tests/queries/0_stateless/01564_test_hint_woes.sql index ec2c319e8d1..fee85130b03 100644 --- a/tests/queries/0_stateless/01564_test_hint_woes.sql +++ b/tests/queries/0_stateless/01564_test_hint_woes.sql @@ -49,3 +49,6 @@ INSERT INTO t0(c0, c1) VALUES ('1', 1) ; -- the return code must be zero after the final query has failed with expected error insert into values_01564 values (11); -- { serverError 469 } + +drop table t0; +drop table values_01564; diff --git a/tests/queries/0_stateless/01576_alias_column_rewrite.reference b/tests/queries/0_stateless/01576_alias_column_rewrite.reference index 679695dd6db..334ebc7eb1f 100644 --- a/tests/queries/0_stateless/01576_alias_column_rewrite.reference +++ b/tests/queries/0_stateless/01576_alias_column_rewrite.reference @@ -35,18 +35,18 @@ Expression (Projection) Expression ((Before ORDER BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) Expression (Projection) Limit (preliminary LIMIT) FinishSorting Expression (Before ORDER BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) optimize_aggregation_in_order Expression ((Projection + Before ORDER BY)) Aggregating @@ -58,17 +58,17 @@ Expression ((Projection + Before ORDER BY)) Expression ((Before GROUP BY + Add table aliases)) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) Expression ((Projection + Before ORDER BY)) Aggregating Expression (Before GROUP BY) SettingQuotaAndLimits (Set limits and quota after reading from storage) Union - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) - ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) + ReadFromStorage (MergeTree with order) second-index 1 1 diff --git a/tests/queries/0_stateless/01576_if_null_external_aggregation.sql b/tests/queries/0_stateless/01576_if_null_external_aggregation.sql index b9c36a9cecc..cffeb73b1ca 100644 --- a/tests/queries/0_stateless/01576_if_null_external_aggregation.sql +++ b/tests/queries/0_stateless/01576_if_null_external_aggregation.sql @@ -3,5 +3,5 @@ SET max_bytes_before_external_group_by = 200000000; SET max_memory_usage = 1500000000; SET max_threads = 12; -SELECT bitAnd(number, pow(2, 20) - 1) as k, argMaxIf(k, number % 2 = 0 ? number : Null, number > 42), uniq(number) AS u FROM numbers(1000000) GROUP BY k format Null; +SELECT bitAnd(number, toUInt64(pow(2, 20) - 1)) as k, argMaxIf(k, number % 2 = 0 ? number : Null, number > 42), uniq(number) AS u FROM numbers(1000000) GROUP BY k format Null; diff --git a/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql b/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql index 4b613b6d7ce..28507e25fd4 100644 --- a/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql +++ b/tests/queries/0_stateless/01586_storage_join_low_cardinality_key.sql @@ -9,3 +9,5 @@ INSERT INTO low_card VALUES ( '1' ); SELECT * FROM low_card; SELECT * FROM low_card WHERE lc = '1'; SELECT CAST(lc AS String) FROM low_card; + +DROP TABLE low_card; diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 45cb4ac3994..d2543f0db75 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -2,7 +2,7 @@ set allow_experimental_window_functions = 1; -- just something basic -select number, count() over (partition by intDiv(number, 3) order by number) from numbers(10); +select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); 0 1 1 2 2 3 @@ -14,7 +14,7 @@ select number, count() over (partition by intDiv(number, 3) order by number) fro 8 3 9 1 -- proper calculation across blocks -select number, max(number) over (partition by intDiv(number, 3) order by number desc) from numbers(10) settings max_block_size = 2; +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; 2 2 1 2 0 2 @@ -26,9 +26,9 @@ select number, max(number) over (partition by intDiv(number, 3) order by number 6 8 9 9 -- not a window function -select number, abs(number) over (partition by toString(intDiv(number, 3))) from numbers(10); -- { serverError 63 } +select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } -- no partition by -select number, avg(number) over (order by number) from numbers(10); +select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); 0 0 1 0.5 2 1 @@ -40,7 +40,7 @@ select number, avg(number) over (order by number) from numbers(10); 8 4 9 4.5 -- no order by -select number, quantileExact(number) over (partition by intDiv(number, 3)) from numbers(10); +select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10); 0 0 1 1 2 1 @@ -52,7 +52,7 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) from 8 7 9 9 -- can add an alias after window spec -select number, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); +select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); 0 0 1 1 2 1 @@ -65,14 +65,14 @@ select number, quantileExact(number) over (partition by intDiv(number, 3)) q fro 9 9 -- can't reference it yet -- the window functions are calculated at the -- last stage of select, after all other functions. -select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 } +select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } -- must work in WHERE if you wrap it in a subquery -select * from (select count(*) over () c from numbers(3)) where c > 0; +select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; 1 2 3 -- should work in ORDER BY -select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number; +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; 9 9 6 8 7 8 @@ -84,14 +84,14 @@ select number, max(number) over (partition by intDiv(number, 3) order by number 1 2 2 2 -- also works in ORDER BY if you wrap it in a subquery -select * from (select count(*) over () c from numbers(3)) order by c; +select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; 1 2 3 -- Example with window function only in ORDER BY. Here we make a rank of all -- numbers sorted descending, and then sort by this rank descending, and must get -- the ascending order. -select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc) desc; +select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; 0 1 2 @@ -100,23 +100,23 @@ select * from (select * from numbers(5) order by rand()) order by count() over ( -- Aggregate functions as window function arguments. This query is semantically -- the same as the above one, only we replace `number` with -- `any(number) group by number` and so on. -select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc; +select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; 0 1 2 3 4 -- some more simple cases w/aggregate functions -select sum(any(number)) over () from numbers(1); +select sum(any(number)) over (rows unbounded preceding) from numbers(1); 0 -select sum(any(number) + 1) over () from numbers(1); +select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); 1 -select sum(any(number + 1)) over () from numbers(1); +select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); 1 -- different windows -- an explain test would also be helpful, but it's too immature now and I don't -- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 5) order by number) as m from numbers(31) order by number settings max_block_size = 2; +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; 0 2 1 1 2 2 2 2 3 @@ -151,7 +151,7 @@ select number, max(number) over (partition by intDiv(number, 3) order by number -- two functions over the same window -- an explain test would also be helpful, but it's too immature now and I don't -- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 3) order by number desc) as m from numbers(7) order by number settings max_block_size = 2; +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; 0 2 3 1 2 2 2 2 1 @@ -163,22 +163,26 @@ select number, max(number) over (partition by intDiv(number, 3) order by number select median(x) over (partition by x) from (select 1 x); 1 -- an empty window definition is valid as well -select groupArray(number) over () from numbers(3); +select groupArray(number) over (rows unbounded preceding) from numbers(3); [0] [0,1] [0,1,2] +select groupArray(number) over () from numbers(3); +[0,1,2] +[0,1,2] +[0,1,2] -- This one tests we properly process the window function arguments. -- Seen errors like 'column `1` not found' from count(1). -select count(1) over (), max(number + 1) over () from numbers(3); +select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); 1 3 -- Should work in DISTINCT -select distinct sum(0) over () from numbers(2); +select distinct sum(0) over (rows unbounded preceding) from numbers(2); 0 -select distinct any(number) over () from numbers(2); +select distinct any(number) over (rows unbounded preceding) from numbers(2); 0 -- Various kinds of aliases are properly substituted into various parts of window -- function definition. -with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x) from numbers(7); +with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); 0 1 0 3 0 6 @@ -192,8 +196,8 @@ select 1 window w1 as (); select sum(number) over w1, sum(number) over w2 from numbers(10) window - w1 as (), - w2 as (partition by intDiv(number, 3)) + w1 as (rows unbounded preceding), + w2 as (partition by intDiv(number, 3) rows unbounded preceding) ; 0 0 1 1 @@ -205,12 +209,14 @@ window 28 13 36 21 45 9 +-- FIXME both functions should use the same window, but they don't. Add an +-- EXPLAIN test for this. select sum(number) over w1, - sum(number) over (partition by intDiv(number, 3)) + sum(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10) window - w1 as (partition by intDiv(number, 3)) + w1 as (partition by intDiv(number, 3) rows unbounded preceding) ; 0 0 1 1 @@ -222,3 +228,768 @@ window 13 13 21 21 9 9 +-- RANGE frame +-- It's the default +select sum(number) over () from numbers(3); +3 +3 +3 +-- Try some mutually prime sizes of partition, group and block, for the number +-- of rows that is their least common multiple + 1, so that we see all the +-- interesting corner cases. +select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 5 +; +0 0 0 2 +1 0 1 3 +2 0 0 2 +3 1 1 3 +4 1 0 1 +5 1 1 3 +6 2 0 2 +7 2 1 3 +8 2 0 2 +9 3 1 3 +10 3 0 1 +11 3 1 3 +12 4 0 2 +13 4 1 3 +14 4 0 2 +15 5 1 3 +16 5 0 1 +17 5 1 3 +18 6 0 2 +19 6 1 3 +20 6 0 2 +21 7 1 3 +22 7 0 1 +23 7 1 3 +24 8 0 2 +25 8 1 3 +26 8 0 2 +27 9 1 3 +28 9 0 1 +29 9 1 3 +30 10 0 1 +select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 2 +; +0 0 0 2 +1 0 1 4 +2 0 2 5 +3 0 0 2 +4 0 1 4 +5 1 2 5 +6 1 0 2 +7 1 1 3 +8 1 2 5 +9 1 0 2 +10 2 1 3 +11 2 2 5 +12 2 0 1 +13 2 1 3 +14 2 2 5 +15 3 0 2 +16 3 1 4 +17 3 2 5 +18 3 0 2 +19 3 1 4 +20 4 2 5 +21 4 0 2 +22 4 1 3 +23 4 2 5 +24 4 0 2 +25 5 1 3 +26 5 2 5 +27 5 0 1 +28 5 1 3 +29 5 2 5 +30 6 0 1 +select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 3 +; +0 0 0 3 +1 0 1 5 +2 0 0 3 +3 0 1 5 +4 0 0 3 +5 1 1 5 +6 1 0 2 +7 1 1 5 +8 1 0 2 +9 1 1 5 +10 2 0 3 +11 2 1 5 +12 2 0 3 +13 2 1 5 +14 2 0 3 +15 3 1 5 +16 3 0 2 +17 3 1 5 +18 3 0 2 +19 3 1 5 +20 4 0 3 +21 4 1 5 +22 4 0 3 +23 4 1 5 +24 4 0 3 +25 5 1 5 +26 5 0 2 +27 5 1 5 +28 5 0 2 +29 5 1 5 +30 6 0 1 +select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 2 +; +0 0 0 1 +1 0 1 2 +2 0 2 3 +3 1 3 2 +4 1 4 3 +5 1 0 1 +6 2 1 1 +7 2 2 2 +8 2 3 3 +9 3 4 3 +10 3 0 1 +11 3 1 2 +12 4 2 1 +13 4 3 2 +14 4 4 3 +15 5 0 1 +16 5 1 2 +17 5 2 3 +18 6 3 2 +19 6 4 3 +20 6 0 1 +21 7 1 1 +22 7 2 2 +23 7 3 3 +24 8 4 3 +25 8 0 1 +26 8 1 2 +27 9 2 1 +28 9 3 2 +29 9 4 3 +30 10 0 1 +select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 3 +; +0 0 0 1 +1 0 1 2 +2 1 2 1 +3 1 3 2 +4 2 4 2 +5 2 0 1 +6 3 1 1 +7 3 2 2 +8 4 3 1 +9 4 4 2 +10 5 0 1 +11 5 1 2 +12 6 2 1 +13 6 3 2 +14 7 4 2 +15 7 0 1 +16 8 1 1 +17 8 2 2 +18 9 3 1 +19 9 4 2 +20 10 0 1 +21 10 1 2 +22 11 2 1 +23 11 3 2 +24 12 4 2 +25 12 0 1 +26 13 1 1 +27 13 2 2 +28 14 3 1 +29 14 4 2 +30 15 0 1 +select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 5 +; +0 0 0 1 +1 0 1 2 +2 1 2 2 +3 1 0 1 +4 2 1 1 +5 2 2 2 +6 3 0 1 +7 3 1 2 +8 4 2 2 +9 4 0 1 +10 5 1 1 +11 5 2 2 +12 6 0 1 +13 6 1 2 +14 7 2 2 +15 7 0 1 +16 8 1 1 +17 8 2 2 +18 9 0 1 +19 9 1 2 +20 10 2 2 +21 10 0 1 +22 11 1 1 +23 11 2 2 +24 12 0 1 +25 12 1 2 +26 13 2 2 +27 13 0 1 +28 14 1 1 +29 14 2 2 +30 15 0 1 +-- A case where the partition end is in the current block, and the frame end +-- is triggered by the partition end. +select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); +0 +0 +0 +3 +3 +3 +6 +6 +6 +9 +-- UNBOUNDED FOLLOWING frame end +select + min(number) over wa, min(number) over wo, + max(number) over wa, max(number) over wo +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(31)) +window + wa as (partition by p order by o + range between unbounded preceding and unbounded following), + wo as (partition by p order by o + rows between unbounded preceding and unbounded following) +settings max_block_size = 2; +0 0 2 2 +0 0 2 2 +0 0 2 2 +3 3 5 5 +3 3 5 5 +3 3 5 5 +6 6 8 8 +6 6 8 8 +6 6 8 8 +9 9 11 11 +9 9 11 11 +9 9 11 11 +12 12 14 14 +12 12 14 14 +12 12 14 14 +15 15 17 17 +15 15 17 17 +15 15 17 17 +18 18 20 20 +18 18 20 20 +18 18 20 20 +21 21 23 23 +21 21 23 23 +21 21 23 23 +24 24 26 26 +24 24 26 26 +24 24 26 26 +27 27 29 29 +27 27 29 29 +27 27 29 29 +30 30 30 30 +-- ROWS offset frame start +select number, p, + count(*) over (partition by p order by number + rows between 1 preceding and unbounded following), + count(*) over (partition by p order by number + rows between current row and unbounded following), + count(*) over (partition by p order by number + rows between 1 following and unbounded following) +from (select number, intDiv(number, 5) p from numbers(31)) +order by p, number +settings max_block_size = 2; +0 0 5 5 4 +1 0 5 4 3 +2 0 4 3 2 +3 0 3 2 1 +4 0 2 1 0 +5 1 5 5 4 +6 1 5 4 3 +7 1 4 3 2 +8 1 3 2 1 +9 1 2 1 0 +10 2 5 5 4 +11 2 5 4 3 +12 2 4 3 2 +13 2 3 2 1 +14 2 2 1 0 +15 3 5 5 4 +16 3 5 4 3 +17 3 4 3 2 +18 3 3 2 1 +19 3 2 1 0 +20 4 5 5 4 +21 4 5 4 3 +22 4 4 3 2 +23 4 3 2 1 +24 4 2 1 0 +25 5 5 5 4 +26 5 5 4 3 +27 5 4 3 2 +28 5 3 2 1 +29 5 2 1 0 +30 6 1 1 0 +-- ROWS offset frame start and end +select number, p, + count(*) over (partition by p order by number + rows between 2 preceding and 2 following) +from (select number, intDiv(number, 7) p from numbers(71)) +order by p, number +settings max_block_size = 2; +0 0 3 +1 0 4 +2 0 5 +3 0 5 +4 0 5 +5 0 4 +6 0 3 +7 1 3 +8 1 4 +9 1 5 +10 1 5 +11 1 5 +12 1 4 +13 1 3 +14 2 3 +15 2 4 +16 2 5 +17 2 5 +18 2 5 +19 2 4 +20 2 3 +21 3 3 +22 3 4 +23 3 5 +24 3 5 +25 3 5 +26 3 4 +27 3 3 +28 4 3 +29 4 4 +30 4 5 +31 4 5 +32 4 5 +33 4 4 +34 4 3 +35 5 3 +36 5 4 +37 5 5 +38 5 5 +39 5 5 +40 5 4 +41 5 3 +42 6 3 +43 6 4 +44 6 5 +45 6 5 +46 6 5 +47 6 4 +48 6 3 +49 7 3 +50 7 4 +51 7 5 +52 7 5 +53 7 5 +54 7 4 +55 7 3 +56 8 3 +57 8 4 +58 8 5 +59 8 5 +60 8 5 +61 8 4 +62 8 3 +63 9 3 +64 9 4 +65 9 5 +66 9 5 +67 9 5 +68 9 4 +69 9 3 +70 10 1 +SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); +1 +2 +3 +3 +-- frame boundaries that runs into the partition end +select + count() over (partition by intDiv(number, 3) + rows between 100 following and unbounded following), + count() over (partition by intDiv(number, 3) + rows between current row and 100 following) +from numbers(10); +0 3 +0 2 +0 1 +0 3 +0 2 +0 1 +0 3 +0 2 +0 1 +0 1 +-- seen a use-after-free under MSan in this query once +SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; +-- a corner case +select count() over (); +1 +-- RANGE CURRENT ROW frame start +select number, p, o, + count(*) over (partition by p order by o + range between current row and unbounded following) +from (select number, intDiv(number, 5) p, mod(number, 3) o + from numbers(31)) +order by p, o, number +settings max_block_size = 2; +0 0 0 5 +3 0 0 5 +1 0 1 3 +4 0 1 3 +2 0 2 1 +6 1 0 5 +9 1 0 5 +7 1 1 3 +5 1 2 2 +8 1 2 2 +12 2 0 5 +10 2 1 4 +13 2 1 4 +11 2 2 2 +14 2 2 2 +15 3 0 5 +18 3 0 5 +16 3 1 3 +19 3 1 3 +17 3 2 1 +21 4 0 5 +24 4 0 5 +22 4 1 3 +20 4 2 2 +23 4 2 2 +27 5 0 5 +25 5 1 4 +28 5 1 4 +26 5 2 2 +29 5 2 2 +30 6 0 1 +select + count(*) over (rows between current row and current row), + count(*) over (range between current row and current row) +from numbers(3); +1 3 +1 3 +1 3 +-- RANGE OFFSET +-- a basic RANGE OFFSET frame +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) +window w as (order by x asc range between 1 preceding and 2 following) +order by x; +0 0 2 3 +1 0 3 4 +2 1 4 4 +3 2 5 4 +4 3 6 4 +5 4 7 4 +6 5 8 4 +7 6 9 4 +8 7 10 4 +9 8 10 3 +10 9 10 2 +-- overflow conditions +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toUInt8(if(mod(number, 2), + toInt64(255 - intDiv(number, 2)), + toInt64(intDiv(number, 2)))) x + from numbers(10) +) +window w as (order by x range between 1 preceding and 2 following) +order by x; +0 0 2 3 +1 0 3 4 +2 1 4 4 +3 2 4 3 +4 3 4 2 +251 251 253 3 +252 251 254 4 +253 252 255 4 +254 253 255 3 +255 254 255 2 +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toInt8(multiIf( + mod(number, 3) == 0, toInt64(intDiv(number, 3)), + mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), + toInt64(-128 + intDiv(number, 3)))) x + from numbers(15) +) +window w as (order by x range between 1 preceding and 2 following) +order by x; +-128 -128 -126 3 +-127 -128 -125 4 +-126 -127 -124 4 +-125 -126 -124 3 +-124 -125 -124 2 +0 0 2 3 +1 0 3 4 +2 1 4 4 +3 2 4 3 +4 3 4 2 +123 123 125 3 +124 123 126 4 +125 124 127 4 +126 125 127 3 +127 126 127 2 +-- RANGE OFFSET ORDER BY DESC +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between 1 preceding and 2 following) +order by x +settings max_block_size = 1; +0 0 1 2 +1 0 2 3 +2 0 3 4 +3 1 4 4 +4 2 5 4 +5 3 6 4 +6 4 7 4 +7 5 8 4 +8 6 9 4 +9 7 10 4 +10 8 10 3 +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between 1 preceding and unbounded following) +order by x +settings max_block_size = 2; +0 0 1 2 +1 0 2 3 +2 0 3 4 +3 0 4 5 +4 0 5 6 +5 0 6 7 +6 0 7 8 +7 0 8 9 +8 0 9 10 +9 0 10 11 +10 0 10 11 +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between unbounded preceding and 2 following) +order by x +settings max_block_size = 3; +0 0 10 11 +1 0 10 11 +2 0 10 11 +3 1 10 10 +4 2 10 9 +5 3 10 8 +6 4 10 7 +7 5 10 6 +8 6 10 5 +9 7 10 4 +10 8 10 3 +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between unbounded preceding and 2 preceding) +order by x +settings max_block_size = 4; +0 2 10 9 +1 3 10 8 +2 4 10 7 +3 5 10 6 +4 6 10 5 +5 7 10 4 +6 8 10 3 +7 9 10 2 +8 10 10 1 +9 0 0 0 +10 0 0 0 +-- Check that we put windows in such an order that we can reuse the sort. +-- First, check that at least the result is correct when we have many windows +-- with different sort order. +select + number, + count(*) over (partition by p order by number), + count(*) over (partition by p order by number, o), + count(*) over (), + count(*) over (order by number), + count(*) over (order by o), + count(*) over (order by o, number), + count(*) over (order by number, o), + count(*) over (partition by p order by o, number), + count(*) over (partition by p), + count(*) over (partition by p order by o), + count(*) over (partition by p, o order by number) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +order by number +; +0 1 1 16 1 4 1 1 1 3 1 1 +1 2 2 16 2 7 5 2 2 3 2 1 +2 3 3 16 3 10 8 3 3 3 3 1 +3 1 1 16 4 13 11 4 2 3 2 1 +4 2 2 16 5 16 14 5 3 3 3 1 +5 3 3 16 6 4 2 6 1 3 1 1 +6 1 1 16 7 7 6 7 1 3 1 1 +7 2 2 16 8 10 9 8 2 3 2 1 +8 3 3 16 9 13 12 9 3 3 3 1 +9 1 1 16 10 16 15 10 3 3 3 1 +10 2 2 16 11 4 3 11 1 3 1 1 +11 3 3 16 12 7 7 12 2 3 2 1 +12 1 1 16 13 10 10 13 1 3 1 1 +13 2 2 16 14 13 13 14 2 3 2 1 +14 3 3 16 15 16 16 15 3 3 3 1 +15 1 1 16 16 4 4 16 1 1 1 1 +-- The EXPLAIN for the above query would be difficult to understand, so check some +-- simple cases instead. +explain select + count(*) over (partition by p), + count(*) over (), + count(*) over (partition by p order by o) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +; +Expression ((Projection + Before ORDER BY)) + Window (Window step for window \'\') + Window (Window step for window \'PARTITION BY p\') + Window (Window step for window \'PARTITION BY p ORDER BY o ASC\') + MergingSorted (Merge sorted streams for window \'PARTITION BY p ORDER BY o ASC\') + MergeSorting (Merge sorted blocks for window \'PARTITION BY p ORDER BY o ASC\') + PartialSorting (Sort each block for window \'PARTITION BY p ORDER BY o ASC\') + Expression ((Before window functions + (Projection + Before ORDER BY))) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) +explain select + count(*) over (order by o, number), + count(*) over (order by number) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +; +Expression ((Projection + Before ORDER BY)) + Window (Window step for window \'ORDER BY o ASC, number ASC\') + MergingSorted (Merge sorted streams for window \'ORDER BY o ASC, number ASC\') + MergeSorting (Merge sorted blocks for window \'ORDER BY o ASC, number ASC\') + PartialSorting (Sort each block for window \'ORDER BY o ASC, number ASC\') + Window (Window step for window \'ORDER BY number ASC\') + MergingSorted (Merge sorted streams for window \'ORDER BY number ASC\') + MergeSorting (Merge sorted blocks for window \'ORDER BY number ASC\') + PartialSorting (Sort each block for window \'ORDER BY number ASC\') + Expression ((Before window functions + (Projection + Before ORDER BY))) + SettingQuotaAndLimits (Set limits and quota after reading from storage) + ReadFromStorage (SystemNumbers) +-- A test case for the sort comparator found by fuzzer. +SELECT + max(number) OVER (ORDER BY number DESC NULLS FIRST), + max(number) OVER (ORDER BY number ASC NULLS FIRST) +FROM numbers(2) +; +1 0 +1 1 +-- some true window functions -- rank and friends +select number, p, o, + count(*) over w, + rank() over w, + dense_rank() over w, + row_number() over w +from (select number, intDiv(number, 5) p, mod(number, 3) o + from numbers(31) order by o, number) t +window w as (partition by p order by o) +order by p, o, number +settings max_block_size = 2; +0 0 0 2 1 1 1 +3 0 0 2 1 1 2 +1 0 1 4 3 2 3 +4 0 1 4 3 2 4 +2 0 2 5 5 3 5 +6 1 0 2 1 1 1 +9 1 0 2 1 1 2 +7 1 1 3 3 2 3 +5 1 2 5 4 3 4 +8 1 2 5 4 3 5 +12 2 0 1 1 1 1 +10 2 1 3 2 2 2 +13 2 1 3 2 2 3 +11 2 2 5 4 3 4 +14 2 2 5 4 3 5 +15 3 0 2 1 1 2 +18 3 0 2 1 1 1 +16 3 1 4 3 2 3 +19 3 1 4 3 2 4 +17 3 2 5 5 3 5 +21 4 0 2 1 1 1 +24 4 0 2 1 1 2 +22 4 1 3 3 2 3 +20 4 2 5 4 3 5 +23 4 2 5 4 3 4 +27 5 0 1 1 1 1 +25 5 1 3 2 2 2 +28 5 1 3 2 2 3 +26 5 2 5 4 3 4 +29 5 2 5 4 3 5 +30 6 0 1 1 1 1 +-- our replacement for lag/lead +select + anyOrNull(number) + over (order by number rows between 1 preceding and 1 preceding), + anyOrNull(number) + over (order by number rows between 1 following and 1 following) +from numbers(5); +\N 1 +0 2 +1 3 +2 4 +3 \N +-- case-insensitive SQL-standard synonyms for any and anyLast +select + number, + fIrSt_VaLue(number) over w, + lAsT_vAlUe(number) over w +from numbers(10) +window w as (order by number range between 1 preceding and 1 following) +order by number +; +0 0 1 +1 0 2 +2 1 3 +3 2 4 +4 3 5 +5 4 6 +6 5 7 +7 6 8 +8 7 9 +9 8 9 diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index 95afb9be408..03bd8371e23 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -3,77 +3,78 @@ set allow_experimental_window_functions = 1; -- just something basic -select number, count() over (partition by intDiv(number, 3) order by number) from numbers(10); +select number, count() over (partition by intDiv(number, 3) order by number rows unbounded preceding) from numbers(10); -- proper calculation across blocks -select number, max(number) over (partition by intDiv(number, 3) order by number desc) from numbers(10) settings max_block_size = 2; +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) from numbers(10) settings max_block_size = 2; -- not a window function -select number, abs(number) over (partition by toString(intDiv(number, 3))) from numbers(10); -- { serverError 63 } +select number, abs(number) over (partition by toString(intDiv(number, 3)) rows unbounded preceding) from numbers(10); -- { serverError 63 } -- no partition by -select number, avg(number) over (order by number) from numbers(10); +select number, avg(number) over (order by number rows unbounded preceding) from numbers(10); -- no order by -select number, quantileExact(number) over (partition by intDiv(number, 3)) from numbers(10); +select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10); -- can add an alias after window spec -select number, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); +select number, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- can't reference it yet -- the window functions are calculated at the -- last stage of select, after all other functions. -select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 } +select q * 10, quantileExact(number) over (partition by intDiv(number, 3) rows unbounded preceding) q from numbers(10); -- { serverError 47 } -- must work in WHERE if you wrap it in a subquery -select * from (select count(*) over () c from numbers(3)) where c > 0; +select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) where c > 0; -- should work in ORDER BY -select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number; +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) m from numbers(10) order by m desc, number; -- also works in ORDER BY if you wrap it in a subquery -select * from (select count(*) over () c from numbers(3)) order by c; +select * from (select count(*) over (rows unbounded preceding) c from numbers(3)) order by c; -- Example with window function only in ORDER BY. Here we make a rank of all -- numbers sorted descending, and then sort by this rank descending, and must get -- the ascending order. -select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc) desc; +select * from (select * from numbers(5) order by rand()) order by count() over (order by number desc rows unbounded preceding) desc; -- Aggregate functions as window function arguments. This query is semantically -- the same as the above one, only we replace `number` with -- `any(number) group by number` and so on. -select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc) desc; +select * from (select * from numbers(5) order by rand()) group by number order by sum(any(number + 1)) over (order by min(number) desc rows unbounded preceding) desc; -- some more simple cases w/aggregate functions -select sum(any(number)) over () from numbers(1); -select sum(any(number) + 1) over () from numbers(1); -select sum(any(number + 1)) over () from numbers(1); +select sum(any(number)) over (rows unbounded preceding) from numbers(1); +select sum(any(number) + 1) over (rows unbounded preceding) from numbers(1); +select sum(any(number + 1)) over (rows unbounded preceding) from numbers(1); -- different windows -- an explain test would also be helpful, but it's too immature now and I don't -- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 5) order by number) as m from numbers(31) order by number settings max_block_size = 2; +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 5) order by number rows unbounded preceding) as m from numbers(31) order by number settings max_block_size = 2; -- two functions over the same window -- an explain test would also be helpful, but it's too immature now and I don't -- want to change reference all the time -select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 3) order by number desc) as m from numbers(7) order by number settings max_block_size = 2; +select number, max(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding), count(number) over (partition by intDiv(number, 3) order by number desc rows unbounded preceding) as m from numbers(7) order by number settings max_block_size = 2; -- check that we can work with constant columns select median(x) over (partition by x) from (select 1 x); -- an empty window definition is valid as well +select groupArray(number) over (rows unbounded preceding) from numbers(3); select groupArray(number) over () from numbers(3); -- This one tests we properly process the window function arguments. -- Seen errors like 'column `1` not found' from count(1). -select count(1) over (), max(number + 1) over () from numbers(3); +select count(1) over (rows unbounded preceding), max(number + 1) over () from numbers(3); -- Should work in DISTINCT -select distinct sum(0) over () from numbers(2); -select distinct any(number) over () from numbers(2); +select distinct sum(0) over (rows unbounded preceding) from numbers(2); +select distinct any(number) over (rows unbounded preceding) from numbers(2); -- Various kinds of aliases are properly substituted into various parts of window -- function definition. -with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x) from numbers(7); +with number + 1 as x select intDiv(number, 3) as y, sum(x + y) over (partition by y order by x rows unbounded preceding) from numbers(7); -- WINDOW clause select 1 window w1 as (); @@ -81,14 +82,266 @@ select 1 window w1 as (); select sum(number) over w1, sum(number) over w2 from numbers(10) window - w1 as (), - w2 as (partition by intDiv(number, 3)) + w1 as (rows unbounded preceding), + w2 as (partition by intDiv(number, 3) rows unbounded preceding) ; +-- FIXME both functions should use the same window, but they don't. Add an +-- EXPLAIN test for this. select sum(number) over w1, - sum(number) over (partition by intDiv(number, 3)) + sum(number) over (partition by intDiv(number, 3) rows unbounded preceding) from numbers(10) window - w1 as (partition by intDiv(number, 3)) + w1 as (partition by intDiv(number, 3) rows unbounded preceding) +; + +-- RANGE frame +-- It's the default +select sum(number) over () from numbers(3); + +-- Try some mutually prime sizes of partition, group and block, for the number +-- of rows that is their least common multiple + 1, so that we see all the +-- interesting corner cases. +select number, intDiv(number, 3) p, mod(number, 2) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 5 +; + +select number, intDiv(number, 5) p, mod(number, 3) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 2 +; + +select number, intDiv(number, 5) p, mod(number, 2) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 3 +; + +select number, intDiv(number, 3) p, mod(number, 5) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 2 +; + +select number, intDiv(number, 2) p, mod(number, 5) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 3 +; + +select number, intDiv(number, 2) p, mod(number, 3) o, count(number) over w as c +from numbers(31) +window w as (partition by p order by o range unbounded preceding) +order by number +settings max_block_size = 5 +; + +-- A case where the partition end is in the current block, and the frame end +-- is triggered by the partition end. +select min(number) over (partition by p) from (select number, intDiv(number, 3) p from numbers(10)); + +-- UNBOUNDED FOLLOWING frame end +select + min(number) over wa, min(number) over wo, + max(number) over wa, max(number) over wo +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(31)) +window + wa as (partition by p order by o + range between unbounded preceding and unbounded following), + wo as (partition by p order by o + rows between unbounded preceding and unbounded following) +settings max_block_size = 2; + +-- ROWS offset frame start +select number, p, + count(*) over (partition by p order by number + rows between 1 preceding and unbounded following), + count(*) over (partition by p order by number + rows between current row and unbounded following), + count(*) over (partition by p order by number + rows between 1 following and unbounded following) +from (select number, intDiv(number, 5) p from numbers(31)) +order by p, number +settings max_block_size = 2; + +-- ROWS offset frame start and end +select number, p, + count(*) over (partition by p order by number + rows between 2 preceding and 2 following) +from (select number, intDiv(number, 7) p from numbers(71)) +order by p, number +settings max_block_size = 2; + +SELECT count(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM numbers(4); + +-- frame boundaries that runs into the partition end +select + count() over (partition by intDiv(number, 3) + rows between 100 following and unbounded following), + count() over (partition by intDiv(number, 3) + rows between current row and 100 following) +from numbers(10); + +-- seen a use-after-free under MSan in this query once +SELECT number, max(number) OVER (PARTITION BY intDiv(number, 7) ORDER BY number ASC NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM numbers(1024) SETTINGS max_block_size = 2 FORMAT Null; + +-- a corner case +select count() over (); + +-- RANGE CURRENT ROW frame start +select number, p, o, + count(*) over (partition by p order by o + range between current row and unbounded following) +from (select number, intDiv(number, 5) p, mod(number, 3) o + from numbers(31)) +order by p, o, number +settings max_block_size = 2; + +select + count(*) over (rows between current row and current row), + count(*) over (range between current row and current row) +from numbers(3); + +-- RANGE OFFSET +-- a basic RANGE OFFSET frame +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) +window w as (order by x asc range between 1 preceding and 2 following) +order by x; + +-- overflow conditions +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toUInt8(if(mod(number, 2), + toInt64(255 - intDiv(number, 2)), + toInt64(intDiv(number, 2)))) x + from numbers(10) +) +window w as (order by x range between 1 preceding and 2 following) +order by x; + +select x, min(x) over w, max(x) over w, count(x) over w +from ( + select toInt8(multiIf( + mod(number, 3) == 0, toInt64(intDiv(number, 3)), + mod(number, 3) == 1, toInt64(127 - intDiv(number, 3)), + toInt64(-128 + intDiv(number, 3)))) x + from numbers(15) +) +window w as (order by x range between 1 preceding and 2 following) +order by x; + +-- RANGE OFFSET ORDER BY DESC +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between 1 preceding and 2 following) +order by x +settings max_block_size = 1; + +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between 1 preceding and unbounded following) +order by x +settings max_block_size = 2; + +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between unbounded preceding and 2 following) +order by x +settings max_block_size = 3; + +select x, min(x) over w, max(x) over w, count(x) over w from ( + select toUInt8(number) x from numbers(11)) t +window w as (order by x desc range between unbounded preceding and 2 preceding) +order by x +settings max_block_size = 4; + + +-- Check that we put windows in such an order that we can reuse the sort. +-- First, check that at least the result is correct when we have many windows +-- with different sort order. +select + number, + count(*) over (partition by p order by number), + count(*) over (partition by p order by number, o), + count(*) over (), + count(*) over (order by number), + count(*) over (order by o), + count(*) over (order by o, number), + count(*) over (order by number, o), + count(*) over (partition by p order by o, number), + count(*) over (partition by p), + count(*) over (partition by p order by o), + count(*) over (partition by p, o order by number) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +order by number +; + +-- The EXPLAIN for the above query would be difficult to understand, so check some +-- simple cases instead. +explain select + count(*) over (partition by p), + count(*) over (), + count(*) over (partition by p order by o) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +; + +explain select + count(*) over (order by o, number), + count(*) over (order by number) +from + (select number, intDiv(number, 3) p, mod(number, 5) o + from numbers(16)) t +; + +-- A test case for the sort comparator found by fuzzer. +SELECT + max(number) OVER (ORDER BY number DESC NULLS FIRST), + max(number) OVER (ORDER BY number ASC NULLS FIRST) +FROM numbers(2) +; + +-- some true window functions -- rank and friends +select number, p, o, + count(*) over w, + rank() over w, + dense_rank() over w, + row_number() over w +from (select number, intDiv(number, 5) p, mod(number, 3) o + from numbers(31) order by o, number) t +window w as (partition by p order by o) +order by p, o, number +settings max_block_size = 2; + +-- our replacement for lag/lead +select + anyOrNull(number) + over (order by number rows between 1 preceding and 1 preceding), + anyOrNull(number) + over (order by number rows between 1 following and 1 following) +from numbers(5); + +-- case-insensitive SQL-standard synonyms for any and anyLast +select + number, + fIrSt_VaLue(number) over w, + lAsT_vAlUe(number) over w +from numbers(10) +window w as (order by number range between 1 preceding and 1 following) +order by number ; diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh index 7f111538a06..6ae103bdf6e 100755 --- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh +++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill.sh @@ -28,7 +28,7 @@ function kill_mutation_thread # find any mutation and kill it mutation_id=$($CLICKHOUSE_CLIENT --query "SELECT mutation_id FROM system.mutations WHERE is_done=0 and database='${CLICKHOUSE_DATABASE}' and table='concurrent_mutate_kill' LIMIT 1") if [ ! -z "$mutation_id" ]; then - $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id'" 1> /dev/null + $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id' and table='concurrent_mutate_kill' and database='${CLICKHOUSE_DATABASE}'" 1> /dev/null sleep 1 fi done diff --git a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh index 60e2adb4204..bfa68328c06 100755 --- a/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh +++ b/tests/queries/0_stateless/01593_concurrent_alter_mutations_kill_many_replicas.sh @@ -40,7 +40,7 @@ function kill_mutation_thread # find any mutation and kill it mutation_id=$($CLICKHOUSE_CLIENT --query "SELECT mutation_id FROM system.mutations WHERE is_done = 0 and table like 'concurrent_kill_%' and database='${CLICKHOUSE_DATABASE}' LIMIT 1") if [ ! -z "$mutation_id" ]; then - $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id'" 1> /dev/null + $CLICKHOUSE_CLIENT --query "KILL MUTATION WHERE mutation_id='$mutation_id' and table like 'concurrent_kill_%' and database='${CLICKHOUSE_DATABASE}'" 1> /dev/null sleep 1 fi done diff --git a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql index b33b74c918d..5de4210d3f2 100644 --- a/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql +++ b/tests/queries/0_stateless/01600_remerge_sort_lowered_memory_bytes_ratio.sql @@ -10,8 +10,8 @@ set max_block_size=40960; -- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption -- MergeSortingTransform: Memory usage is lowered from 186.25 MiB to 95.00 MiB -- MergeSortingTransform: Re-merging is not useful (memory usage was not lowered by remerge_sort_lowered_memory_bytes_ratio=2.0) -select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(toUInt64(3e6)) order by k limit 400e3 format Null; -- { serverError 241 } -select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(toUInt64(3e6)) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 } +select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 format Null; -- { serverError 241 } +select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=2. format Null; -- { serverError 241 } -- remerge_sort_lowered_memory_bytes_ratio 1.9 is good (need at least 1.91/0.98=1.94) -- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 819200 rows) to save memory consumption @@ -26,4 +26,4 @@ select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v -- MergeSortingTransform: Memory usage is lowered from 188.13 MiB to 95.00 MiB -- MergeSortingTransform: Re-merging intermediate ORDER BY data (20 blocks with 809600 rows) to save memory consumption -- MergeSortingTransform: Memory usage is lowered from 188.13 MiB to 95.00 MiB -select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(toUInt64(3e6)) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=1.9 format Null; +select number k, repeat(toString(number), 11) v1, repeat(toString(number), 12) v2 from numbers(3e6) order by k limit 400e3 settings remerge_sort_lowered_memory_bytes_ratio=1.9 format Null; diff --git a/tests/queries/0_stateless/01602_runningConcurrency.reference b/tests/queries/0_stateless/01602_runningConcurrency.reference new file mode 100644 index 00000000000..1bd238ccde8 --- /dev/null +++ b/tests/queries/0_stateless/01602_runningConcurrency.reference @@ -0,0 +1,19 @@ +Invocation with Date columns +1 +2 +3 +2 +1 +Invocation with DateTime +1 +2 +3 +2 +1 +Invocation with DateTime64 +1 +2 +3 +2 +1 +Erroneous cases diff --git a/tests/queries/0_stateless/01602_runningConcurrency.sql b/tests/queries/0_stateless/01602_runningConcurrency.sql new file mode 100644 index 00000000000..55b3aae867a --- /dev/null +++ b/tests/queries/0_stateless/01602_runningConcurrency.sql @@ -0,0 +1,51 @@ +-- +SELECT 'Invocation with Date columns'; + +DROP TABLE IF EXISTS runningConcurrency_test; +CREATE TABLE runningConcurrency_test(begin Date, end Date) ENGINE = Memory; + +INSERT INTO runningConcurrency_test VALUES ('2020-12-01', '2020-12-10'), ('2020-12-02', '2020-12-10'), ('2020-12-03', '2020-12-12'), ('2020-12-10', '2020-12-12'), ('2020-12-13', '2020-12-20'); +SELECT runningConcurrency(begin, end) FROM runningConcurrency_test; + +DROP TABLE runningConcurrency_test; + +-- +SELECT 'Invocation with DateTime'; + +DROP TABLE IF EXISTS runningConcurrency_test; +CREATE TABLE runningConcurrency_test(begin DateTime, end DateTime) ENGINE = Memory; + +INSERT INTO runningConcurrency_test VALUES ('2020-12-01 00:00:00', '2020-12-01 00:59:59'), ('2020-12-01 00:30:00', '2020-12-01 00:59:59'), ('2020-12-01 00:40:00', '2020-12-01 01:30:30'), ('2020-12-01 01:10:00', '2020-12-01 01:30:30'), ('2020-12-01 01:50:00', '2020-12-01 01:59:59'); +SELECT runningConcurrency(begin, end) FROM runningConcurrency_test; + +DROP TABLE runningConcurrency_test; + +-- +SELECT 'Invocation with DateTime64'; + +DROP TABLE IF EXISTS runningConcurrency_test; +CREATE TABLE runningConcurrency_test(begin DateTime64(3), end DateTime64(3)) ENGINE = Memory; + +INSERT INTO runningConcurrency_test VALUES ('2020-12-01 00:00:00.000', '2020-12-01 00:00:00.100'), ('2020-12-01 00:00:00.010', '2020-12-01 00:00:00.100'), ('2020-12-01 00:00:00.020', '2020-12-01 00:00:00.200'), ('2020-12-01 00:00:00.150', '2020-12-01 00:00:00.200'), ('2020-12-01 00:00:00.250', '2020-12-01 00:00:00.300'); +SELECT runningConcurrency(begin, end) FROM runningConcurrency_test; + +DROP TABLE runningConcurrency_test; + +-- +SELECT 'Erroneous cases'; + +-- Constant columns are currently not supported. +SELECT runningConcurrency(toDate(arrayJoin([1, 2])), toDate('2000-01-01')); -- { serverError 44 } + +-- Unsupported data types +SELECT runningConcurrency('strings are', 'not supported'); -- { serverError 43 } +SELECT runningConcurrency(NULL, NULL); -- { serverError 43 } +SELECT runningConcurrency(CAST(NULL, 'Nullable(DateTime)'), CAST(NULL, 'Nullable(DateTime)')); -- { serverError 43 } + +-- Mismatching data types +SELECT runningConcurrency(toDate('2000-01-01'), toDateTime('2000-01-01 00:00:00')); -- { serverError 43 } + +-- begin > end +SELECT runningConcurrency(toDate('2000-01-02'), toDate('2000-01-01')); -- { serverError 117 } + + diff --git a/tests/queries/0_stateless/01606_git_import.sh b/tests/queries/0_stateless/01606_git_import.sh index 16a0b92abe7..6d425c9bceb 100755 --- a/tests/queries/0_stateless/01606_git_import.sh +++ b/tests/queries/0_stateless/01606_git_import.sh @@ -6,6 +6,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # Clone some not too large repository and create a database from it. +cd $CLICKHOUSE_TMP || exit + # Protection for network errors for _ in {1..10}; do rm -rf ./clickhouse-odbc diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference index ac91b53b754..e46fd479413 100644 --- a/tests/queries/0_stateless/01611_constant_folding_subqueries.reference +++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.reference @@ -5,5 +5,7 @@ SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FO 1,10 EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); SELECT - identity(cast(0, \'UInt64\')) AS n, + identity(CAST(0, \'UInt64\')) AS n, toUInt64(10 / n) +SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0); +0 diff --git a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql index abf67a8ed6a..59f057d1ec5 100644 --- a/tests/queries/0_stateless/01611_constant_folding_subqueries.sql +++ b/tests/queries/0_stateless/01611_constant_folding_subqueries.sql @@ -2,3 +2,4 @@ SELECT * FROM (SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n)) FORMAT CSV; SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) FORMAT CSV; EXPLAIN SYNTAX SELECT (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n); +SELECT * FROM (WITH (SELECT * FROM system.numbers LIMIT 1 OFFSET 1) AS n, toUInt64(10 / n) as q SELECT * FROM system.one WHERE q > 0); diff --git a/tests/queries/0_stateless/01621_clickhouse_compressor.sh b/tests/queries/0_stateless/01621_clickhouse_compressor.sh index 5292bcef52a..3157cb0e887 100755 --- a/tests/queries/0_stateless/01621_clickhouse_compressor.sh +++ b/tests/queries/0_stateless/01621_clickhouse_compressor.sh @@ -7,27 +7,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) set -e -TEMP_DIR="$(mktemp -d /tmp/clickhouse.test..XXXXXX)" -cd "${TEMP_DIR:?}" - -function cleanup() -{ - rm -fr "${TEMP_DIR:?}" -} -trap cleanup EXIT - # This is random garbage, so compression ratio will be very low. -tr -cd 'a-z0-9' < /dev/urandom | head -c1M > input +tr -cd 'a-z0-9' < /dev/urandom | head -c1M > ${CLICKHOUSE_TMP}/input # stdin/stdout streams -$CLICKHOUSE_COMPRESSOR < input > output -diff -q <($CLICKHOUSE_COMPRESSOR --decompress < output) input +$CLICKHOUSE_COMPRESSOR < ${CLICKHOUSE_TMP}/input > ${CLICKHOUSE_TMP}/output +diff -q <($CLICKHOUSE_COMPRESSOR --decompress < ${CLICKHOUSE_TMP}/output) ${CLICKHOUSE_TMP}/input # positional arguments, and that fact that input/output will be overwritten -$CLICKHOUSE_COMPRESSOR input output -diff -q <($CLICKHOUSE_COMPRESSOR --decompress output) input +$CLICKHOUSE_COMPRESSOR ${CLICKHOUSE_TMP}/input ${CLICKHOUSE_TMP}/output +diff -q <($CLICKHOUSE_COMPRESSOR --decompress ${CLICKHOUSE_TMP}/output) ${CLICKHOUSE_TMP}/input # --offset-in-decompressed-block -diff -q <($CLICKHOUSE_COMPRESSOR --decompress --offset-in-decompressed-block 10 output) <(tail -c+$((10+1)) input) +diff -q <($CLICKHOUSE_COMPRESSOR --decompress --offset-in-decompressed-block 10 ${CLICKHOUSE_TMP}/output) <(tail -c+$((10+1)) ${CLICKHOUSE_TMP}/input) # TODO: --offset-in-compressed-file using some .bin file (via clickhouse-local + check-marks) diff --git a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh index e7deace8b46..7afdbbc6b66 100755 --- a/tests/queries/0_stateless/01622_defaults_for_url_engine.sh +++ b/tests/queries/0_stateless/01622_defaults_for_url_engine.sh @@ -7,12 +7,12 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) PORT="$(($RANDOM%63000+2001))" -TEMP_FILE="$CURDIR/01622_defaults_for_url_engine.tmp" +TEMP_FILE="${CLICKHOUSE_TMP}/01622_defaults_for_url_engine.tmp" function thread1 { - while true; do - echo -e "HTTP/1.1 200 OK\n\n{\"a\": 1}" | nc -l -p $1 -q 1; + while true; do + echo -e "HTTP/1.1 200 OK\n\n{\"a\": 1}" | nc -l -p $1 -q 1; done } diff --git a/tests/queries/0_stateless/01622_multiple_ttls.reference b/tests/queries/0_stateless/01622_multiple_ttls.reference new file mode 100644 index 00000000000..d9ebb694584 --- /dev/null +++ b/tests/queries/0_stateless/01622_multiple_ttls.reference @@ -0,0 +1,22 @@ +TTL WHERE +1970-10-10 2 +1970-10-10 5 +1970-10-10 8 +2000-10-10 1 +2000-10-10 2 +2000-10-10 4 +2000-10-10 5 +2000-10-10 7 +2000-10-10 8 +TTL GROUP BY +1970-10-01 0 4950 +2000-10-01 0 450 +2000-10-01 1 460 +2000-10-01 2 470 +2000-10-01 3 480 +2000-10-01 4 490 +2000-10-01 5 500 +2000-10-01 6 510 +2000-10-01 7 520 +2000-10-01 8 530 +2000-10-01 9 540 diff --git a/tests/queries/0_stateless/01622_multiple_ttls.sql b/tests/queries/0_stateless/01622_multiple_ttls.sql new file mode 100644 index 00000000000..aa2eeb5759b --- /dev/null +++ b/tests/queries/0_stateless/01622_multiple_ttls.sql @@ -0,0 +1,44 @@ +SELECT 'TTL WHERE'; +DROP TABLE IF EXISTS ttl_where; + +CREATE TABLE ttl_where +( + `d` Date, + `i` UInt32 +) +ENGINE = MergeTree +ORDER BY tuple() +TTL d + toIntervalYear(10) DELETE WHERE i % 3 = 0, + d + toIntervalYear(40) DELETE WHERE i % 3 = 1; + +-- This test will fail at 2040-10-10 + +INSERT INTO ttl_where SELECT toDate('2000-10-10'), number FROM numbers(10); +INSERT INTO ttl_where SELECT toDate('1970-10-10'), number FROM numbers(10); +OPTIMIZE TABLE ttl_where FINAL; + +SELECT * FROM ttl_where ORDER BY d, i; + +DROP TABLE ttl_where; + +SELECT 'TTL GROUP BY'; +DROP TABLE IF EXISTS ttl_group_by; + +CREATE TABLE ttl_group_by +( + `d` Date, + `i` UInt32, + `v` UInt64 +) +ENGINE = MergeTree +ORDER BY (toStartOfMonth(d), i % 10) +TTL d + toIntervalYear(10) GROUP BY toStartOfMonth(d), i % 10 SET d = any(toStartOfMonth(d)), i = any(i % 10), v = sum(v), + d + toIntervalYear(40) GROUP BY toStartOfMonth(d) SET d = any(toStartOfMonth(d)), v = sum(v); + +INSERT INTO ttl_group_by SELECT toDate('2000-10-10'), number, number FROM numbers(100); +INSERT INTO ttl_group_by SELECT toDate('1970-10-10'), number, number FROM numbers(100); +OPTIMIZE TABLE ttl_group_by FINAL; + +SELECT * FROM ttl_group_by ORDER BY d, i; + +DROP TABLE ttl_group_by; diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.reference b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.sql similarity index 96% rename from tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql rename to tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.sql index f059da20755..7a92f40b3f0 100644 --- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql +++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize_long.sql @@ -5,7 +5,7 @@ create table data_01641 (key Int, value String) engine=MergeTree order by (key, -- peak memory usage is 170MiB set max_memory_usage='200Mi'; system stop merges data_01641; -insert into data_01641 select number, toString(number) from numbers(toUInt64(120e6)); +insert into data_01641 select number, toString(number) from numbers(120e6); -- peak: -- - is 21MiB if background merges already scheduled diff --git a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql index 50596680618..c3e459dfc49 100644 --- a/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql +++ b/tests/queries/0_stateless/01650_drop_part_and_deduplication_zookeeper.sql @@ -5,7 +5,7 @@ CREATE TABLE partitioned_table ( partitioner UInt8, value String ) -ENGINE ReplicatedMergeTree('/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table', '1') +ENGINE ReplicatedMergeTree('/clickhouse/01650_drop_part_and_deduplication_partitioned_table', '1') ORDER BY key PARTITION BY partitioner; @@ -16,24 +16,24 @@ INSERT INTO partitioned_table VALUES (11, 1, 'AA'), (22, 2, 'BB'), (33, 3, 'CC') SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name; -SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value; +SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value; INSERT INTO partitioned_table VALUES (33, 3, 'CC'); -- must be deduplicated SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name; -SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value; +SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value; ALTER TABLE partitioned_table DROP PART '3_1_1_0'; SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name; -SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value; +SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value; INSERT INTO partitioned_table VALUES (33, 3, 'CC'); -- mustn't be deduplicated SELECT partition_id, name FROM system.parts WHERE table = 'partitioned_table' AND database = currentDatabase() ORDER BY name; -SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/test/01650_drop_part_and_deduplication/partitioned_table/blocks/' ORDER BY value; +SELECT substring(name, 1, 2), value FROM system.zookeeper WHERE path='/clickhouse/01650_drop_part_and_deduplication_partitioned_table/blocks/' ORDER BY value; DROP TABLE IF EXISTS partitioned_table; diff --git a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql index b3d3ad81834..d664ec606b5 100644 --- a/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql +++ b/tests/queries/0_stateless/01652_ignore_and_low_cardinality.sql @@ -4,3 +4,5 @@ SELECT ignore(10, ignore(*), ignore(ignore(-2, 1025, *)), NULL, *), * FROM lc_nu SELECT ignore(toLowCardinality(1), toLowCardinality(2), 3); + +DROP TABLE lc_null_int8_defnull; diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference new file mode 100644 index 00000000000..19487c9f942 --- /dev/null +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.reference @@ -0,0 +1,140 @@ +---------Q1---------- +2 2 2 20 +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON a = table2.a +WHERE table2.b = toUInt32(20) +---------Q2---------- +2 2 2 20 +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON a = table2.a +WHERE (table2.a < table2.b) AND (table2.b = toUInt32(20)) +---------Q3---------- +---------Q4---------- +6 40 +SELECT + a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON a = toUInt32(10 - table2.a) +WHERE (b = 6) AND (table2.b > 20) +---------Q5---------- +SELECT + a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 + WHERE 0 +) AS table2 ON a = table2.a +WHERE 0 +---------Q6---------- +---------Q7---------- +0 0 0 0 +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL INNER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON a = table2.a +WHERE (table2.b < toUInt32(40)) AND (b < 1) +---------Q8---------- +---------Q9---will not be optimized---------- +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL LEFT JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON (a = table2.a) AND (b = toUInt32(10)) +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL RIGHT JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON (a = table2.a) AND (b = toUInt32(10)) +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL FULL OUTER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON (a = table2.a) AND (b = toUInt32(10)) +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +ALL FULL OUTER JOIN +( + SELECT + a, + b + FROM table2 +) AS table2 ON (a = table2.a) AND (table2.b = toUInt32(10)) +WHERE a < toUInt32(20) +SELECT + a, + b, + table2.a, + table2.b +FROM table1 +CROSS JOIN table2 diff --git a/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql new file mode 100644 index 00000000000..23871a9c47c --- /dev/null +++ b/tests/queries/0_stateless/01653_move_conditions_from_join_on_to_where.sql @@ -0,0 +1,48 @@ +DROP TABLE IF EXISTS table1; +DROP TABLE IF EXISTS table2; + +CREATE TABLE table1 (a UInt32, b UInt32) ENGINE = Memory; +CREATE TABLE table2 (a UInt32, b UInt32) ENGINE = Memory; + +INSERT INTO table1 SELECT number, number FROM numbers(10); +INSERT INTO table2 SELECT number * 2, number * 20 FROM numbers(6); + +SELECT '---------Q1----------'; +SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(20)); +EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(20)); + +SELECT '---------Q2----------'; +SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.a < table2.b) AND (table2.b = toUInt32(20)); +EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.a < table2.b) AND (table2.b = toUInt32(20)); + +SELECT '---------Q3----------'; +SELECT * FROM table1 JOIN table2 ON (table1.a = toUInt32(table2.a + 5)) AND (table2.a < table1.b) AND (table2.b > toUInt32(20)); -- { serverError 48 } + +SELECT '---------Q4----------'; +SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20); +EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 INNER JOIN table2 ON (table1.a = toUInt32(10 - table2.a)) AND (table1.b = 6) AND (table2.b > 20); + +SELECT '---------Q5----------'; +SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table1.b = 6) AND (table2.b > 20) AND (10 < 6); +EXPLAIN SYNTAX SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table1.b = 6) AND (table2.b > 20) AND (10 < 6); + +SELECT '---------Q6----------'; +SELECT table1.a, table2.b FROM table1 JOIN table2 ON (table1.b = 6) AND (table2.b > 20); -- { serverError 403 } + +SELECT '---------Q7----------'; +SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1; +EXPLAIN SYNTAX SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b < 1; +SELECT * FROM table1 JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(40)) where table1.b > 10; + +SELECT '---------Q8----------'; +SELECT * FROM table1 INNER JOIN table2 ON (table1.a = table2.a) AND (table2.b < toUInt32(table1, 10)); -- { serverError 47 } + +SELECT '---------Q9---will not be optimized----------'; +EXPLAIN SYNTAX SELECT * FROM table1 LEFT JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10)); +EXPLAIN SYNTAX SELECT * FROM table1 RIGHT JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10)); +EXPLAIN SYNTAX SELECT * FROM table1 FULL JOIN table2 ON (table1.a = table2.a) AND (table1.b = toUInt32(10)); +EXPLAIN SYNTAX SELECT * FROM table1 FULL JOIN table2 ON (table1.a = table2.a) AND (table2.b = toUInt32(10)) WHERE table1.a < toUInt32(20); +EXPLAIN SYNTAX SELECT * FROM table1 , table2; + +DROP TABLE table1; +DROP TABLE table2; diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference index e12ee221a7b..c80d31a343a 100644 --- a/tests/queries/0_stateless/01656_test_query_log_factories_info.reference +++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.reference @@ -11,7 +11,7 @@ arraySort(used_table_functions) ['numbers'] arraySort(used_functions) -['addDays','array','arrayFlatten','cast','crc32','modulo','plus','pow','round','substring','tanh','toDate','toDayOfYear','toTypeName','toWeek'] +['CAST', 'addDays','array','arrayFlatten','crc32','modulo','plus','pow','round','substring','tanh','toDate','toDayOfYear','toTypeName','toWeek'] arraySort(used_data_type_families) ['Array','Int32','Nullable','String'] diff --git a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql index b584f2c38c8..3a890ce16f9 100644 --- a/tests/queries/0_stateless/01656_test_query_log_factories_info.sql +++ b/tests/queries/0_stateless/01656_test_query_log_factories_info.sql @@ -1,3 +1,5 @@ +SET database_atomic_wait_for_drop_and_detach_synchronously=1; + SELECT uniqArray([1, 1, 2]), SUBSTRING('Hello, world', 7, 5), POW(1, 2), ROUND(TANh(1)), CrC32(''), @@ -58,3 +60,5 @@ WHERE current_database = currentDatabase() AND type == 'QueryFinish' AND (query ORDER BY query_start_time DESC LIMIT 1 FORMAT TabSeparatedWithNames; SELECT ''; +DROP TABLE test_query_log_factories_info1.memory_table; +DROP DATABASE test_query_log_factories_info1; diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference new file mode 100644 index 00000000000..87659c32e39 --- /dev/null +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.reference @@ -0,0 +1,25 @@ +aaaaaaaaa bbbbbbbbb +:0 +:0 +:0 +ccccccccc aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +:0 +aaaaaaaaa +bbbbbbbbb +ccccccccc +:107 +:79 +:35 +:35 +:35 +699415 +aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +ccccccccc aaaaaaaaa bbbbbbbbb +699415 0 +:0 +:107 +:79 diff --git a/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh new file mode 100755 index 00000000000..593f0e59ea7 --- /dev/null +++ b/tests/queries/0_stateless/01658_read_file_to_stringcolumn.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +set -eu + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Data preparation. +# Now we can get the user_files_path by use the table file function for trick. also we can get it by query as: +# "insert into function file('exist.txt', 'CSV', 'val1 char') values ('aaaa'); select _path from file('exist.txt', 'CSV', 'val1 char')" +user_files_path=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 |grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +mkdir -p ${user_files_path}/ +echo -n aaaaaaaaa > ${user_files_path}/a.txt +echo -n bbbbbbbbb > ${user_files_path}/b.txt +echo -n ccccccccc > ${user_files_path}/c.txt +echo -n ccccccccc > /tmp/c.txt +mkdir -p ${user_files_path}/dir + + +### 1st TEST in CLIENT mode. +${CLICKHOUSE_CLIENT} --query "drop table if exists data;" +${CLICKHOUSE_CLIENT} --query "create table data (A String, B String) engine=MergeTree() order by A;" + + +# Valid cases: +${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "insert into data select file('${user_files_path}/a.txt'), file('${user_files_path}/b.txt');";echo ":"$? +${CLICKHOUSE_CLIENT} --query "select file('${user_files_path}/c.txt'), * from data";echo ":"$? +${CLICKHOUSE_CLIENT} --multiquery --query " + create table filenames(name String) engine=MergeTree() order by tuple(); + insert into filenames values ('a.txt'), ('b.txt'), ('c.txt'); + select file(name) from filenames format TSV; + drop table if exists filenames; +" + +# Invalid cases: (Here using sub-shell to catch exception avoiding the test quit) +# Test non-exists file +echo "clickhouse-client --query "'"select file('"'nonexist.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null +# Test isDir +echo "clickhouse-client --query "'"select file('"'${user_files_path}/dir'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null +# Test path out of the user_files directory. It's not allowed in client mode +echo "clickhouse-client --query "'"select file('"'/tmp/c.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null + +# Test relative path consists of ".." whose absolute path is out of the user_files directory. +echo "clickhouse-client --query "'"select file('"'${user_files_path}/../../../../tmp/c.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null +echo "clickhouse-client --query "'"select file('"'../../../../a.txt'), file('${user_files_path}/b.txt')"'";echo :$?' | bash 2>/dev/null + + +### 2nd TEST in LOCAL mode. + +echo -n aaaaaaaaa > a.txt +echo -n bbbbbbbbb > b.txt +echo -n ccccccccc > c.txt +mkdir -p dir +#Test for large files, with length : 699415 +c_count=$(wc -c ${CURDIR}/01518_nullable_aggregate_states2.reference | awk '{print $1}') +echo $c_count + +# Valid cases: +# The default dir is the CWD path in LOCAL mode +${CLICKHOUSE_LOCAL} --query " + drop table if exists data; + create table data (A String, B String) engine=MergeTree() order by A; + select file('a.txt'), file('b.txt'); + insert into data select file('a.txt'), file('b.txt'); + insert into data select file('a.txt'), file('b.txt'); + select file('c.txt'), * from data; + select file('/tmp/c.txt'), * from data; + select $c_count, $c_count -length(file('${CURDIR}/01518_nullable_aggregate_states2.reference')) +" +echo ":"$? + + +# Invalid cases: (Here using sub-shell to catch exception avoiding the test quit) +# Test non-exists file +echo "clickhouse-local --query "'"select file('"'nonexist.txt'), file('b.txt')"'";echo :$?' | bash 2>/dev/null + +# Test isDir +echo "clickhouse-local --query "'"select file('"'dir'), file('b.txt')"'";echo :$?' | bash 2>/dev/null + +# Restore +rm -rf a.txt b.txt c.txt dir +rm -rf ${user_files_path}/a.txt +rm -rf ${user_files_path}/b.txt +rm -rf ${user_files_path}/c.txt +rm -rf /tmp/c.txt +rm -rf ${user_files_path}/dir diff --git a/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql b/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql index ca324acdce3..e233f1e6169 100644 --- a/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql +++ b/tests/queries/0_stateless/01665_merge_tree_min_for_concurrent_read.sql @@ -4,3 +4,5 @@ INSERT INTO data_01655 VALUES (1); SELECT * FROM data_01655 SETTINGS merge_tree_min_rows_for_concurrent_read=0, merge_tree_min_bytes_for_concurrent_read=0; -- UINT64_MAX SELECT * FROM data_01655 SETTINGS merge_tree_min_rows_for_concurrent_read=18446744073709551615, merge_tree_min_bytes_for_concurrent_read=18446744073709551615; + +DROP TABLE data_01655; diff --git a/tests/queries/0_stateless/01666_gcd_ubsan.reference b/tests/queries/0_stateless/01666_gcd_ubsan.reference index 2500ef1deae..af041bdcbde 100644 --- a/tests/queries/0_stateless/01666_gcd_ubsan.reference +++ b/tests/queries/0_stateless/01666_gcd_ubsan.reference @@ -4,7 +4,7 @@ SELECT gcd(9223372036854775808, -9223372036854775807); -- { serverError 407 } SELECT gcd(-9223372036854775808, 9223372036854775807); -- { serverError 407 } SELECT gcd(-9223372036854775807, 9223372036854775808); -- { serverError 407 } SELECT gcd(9223372036854775808, -1); -- { serverError 407 } -SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 } +SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 } SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 } SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 } SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720')); diff --git a/tests/queries/0_stateless/01666_gcd_ubsan.sql b/tests/queries/0_stateless/01666_gcd_ubsan.sql index bde2b624cc0..a1f501cda0b 100644 --- a/tests/queries/0_stateless/01666_gcd_ubsan.sql +++ b/tests/queries/0_stateless/01666_gcd_ubsan.sql @@ -4,7 +4,7 @@ SELECT gcd(9223372036854775808, -9223372036854775807); -- { serverError 407 } SELECT gcd(-9223372036854775808, 9223372036854775807); -- { serverError 407 } SELECT gcd(-9223372036854775807, 9223372036854775808); -- { serverError 407 } SELECT gcd(9223372036854775808, -1); -- { serverError 407 } -SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 } +SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 } SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 } SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 } SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720')); diff --git a/tests/queries/0_stateless/01666_lcm_ubsan.reference b/tests/queries/0_stateless/01666_lcm_ubsan.reference index ed9a6aed42b..d8ed13191d3 100644 --- a/tests/queries/0_stateless/01666_lcm_ubsan.reference +++ b/tests/queries/0_stateless/01666_lcm_ubsan.reference @@ -4,7 +4,7 @@ SELECT lcm(9223372036854775808, -9223372036854775807); -- { serverError 407 } SELECT lcm(-9223372036854775808, 9223372036854775807); -- { serverError 407 } SELECT lcm(-9223372036854775807, 9223372036854775808); -- { serverError 407 } SELECT lcm(9223372036854775808, -1); -- { serverError 407 } -SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 } +SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 } SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 } SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 } SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720')); diff --git a/tests/queries/0_stateless/01666_lcm_ubsan.sql b/tests/queries/0_stateless/01666_lcm_ubsan.sql index 5cc3546e941..b3b869c80ed 100644 --- a/tests/queries/0_stateless/01666_lcm_ubsan.sql +++ b/tests/queries/0_stateless/01666_lcm_ubsan.sql @@ -4,7 +4,7 @@ SELECT lcm(9223372036854775808, -9223372036854775807); -- { serverError 407 } SELECT lcm(-9223372036854775808, 9223372036854775807); -- { serverError 407 } SELECT lcm(-9223372036854775807, 9223372036854775808); -- { serverError 407 } SELECT lcm(9223372036854775808, -1); -- { serverError 407 } -SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 48 } +SELECT lcm(-170141183460469231731687303715884105728, -170141183460469231731687303715884105728); -- { serverError 43 } SELECT lcm(toInt128(-170141183460469231731687303715884105728), toInt128(-170141183460469231731687303715884105728)); -- { serverError 407 } SELECT lcm(toInt128(-170141183460469231731687303715884105720), toInt128(-170141183460469231731687303715884105720)); -- { serverError 407 } SELECT lcm(toInt128('-170141183460469231731687303715884105720'), toInt128('-170141183460469231731687303715884105720')); diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference new file mode 100644 index 00000000000..a08a20dc95d --- /dev/null +++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.reference @@ -0,0 +1,15 @@ +Spin up a long running query +Check if another query with some marks to read is throttled +yes +Check if another query with less marks to read is passed +0 100 +Modify min_marks_to_honor_max_concurrent_queries to 1 +Check if another query with less marks to read is throttled +yes +Modify max_concurrent_queries to 2 +Check if another query is passed +0 100 +Modify max_concurrent_queries back to 1 +Check if another query with less marks to read is throttled +yes +finished long_running_query default select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null diff --git a/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh new file mode 100755 index 00000000000..e32a83c9560 --- /dev/null +++ b/tests/queries/0_stateless/01666_merge_tree_max_query_limit.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function wait_for_query_to_start() +{ + while [[ $($CLICKHOUSE_CURL -sS "$CLICKHOUSE_URL" -d "SELECT sum(read_rows) FROM system.processes WHERE query_id = '$1'") == 0 ]]; do sleep 0.1; done +} + +${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +drop table if exists simple; + +create table simple (i int, j int) engine = MergeTree order by i +settings index_granularity = 1, max_concurrent_queries = 1, min_marks_to_honor_max_concurrent_queries = 2; + +insert into simple select number, number + 100 from numbers(1000); +" + +echo "Spin up a long running query" +${CLICKHOUSE_CLIENT} --query "select sleepEachRow(0.01) from simple settings max_block_size = 1 format Null" --query_id "long_running_query" > /dev/null 2>&1 & +wait_for_query_to_start 'long_running_query' + +# query which reads marks >= min_marks_to_honor_max_concurrent_queries is throttled +echo "Check if another query with some marks to read is throttled" +${CLICKHOUSE_CLIENT} --query "select * from simple" 2> /dev/null; +CODE=$? +[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1; +echo "yes" + +# query which reads marks less than min_marks_to_honor_max_concurrent_queries is allowed +echo "Check if another query with less marks to read is passed" +${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" + +# We can modify the settings to take effect for future queries +echo "Modify min_marks_to_honor_max_concurrent_queries to 1" +${CLICKHOUSE_CLIENT} --query "alter table simple modify setting min_marks_to_honor_max_concurrent_queries = 1" + +# Now smaller queries are also throttled +echo "Check if another query with less marks to read is throttled" +${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2> /dev/null; +CODE=$? +[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1; +echo "yes" + +echo "Modify max_concurrent_queries to 2" +${CLICKHOUSE_CLIENT} --query "alter table simple modify setting max_concurrent_queries = 2" + +# Now more queries are accepted +echo "Check if another query is passed" +${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" + +echo "Modify max_concurrent_queries back to 1" +${CLICKHOUSE_CLIENT} --query "alter table simple modify setting max_concurrent_queries = 1" + +# Now queries are throttled again +echo "Check if another query with less marks to read is throttled" +${CLICKHOUSE_CLIENT} --query "select * from simple where i = 0" 2> /dev/null; +CODE=$? +[ "$CODE" -ne "202" ] && echo "Expected error code: 202 but got: $CODE" && exit 1; +echo "yes" + +${CLICKHOUSE_CLIENT} --query "KILL QUERY WHERE query_id = 'long_running_query' SYNC" +wait + +${CLICKHOUSE_CLIENT} --multiline --multiquery --query " +drop table simple +" diff --git a/tests/queries/0_stateless/01669_columns_declaration_serde.sql b/tests/queries/0_stateless/01669_columns_declaration_serde.sql index 8e3354d63cd..a6bf1184e9f 100644 --- a/tests/queries/0_stateless/01669_columns_declaration_serde.sql +++ b/tests/queries/0_stateless/01669_columns_declaration_serde.sql @@ -22,12 +22,12 @@ DROP TABLE IF EXISTS test_r1; DROP TABLE IF EXISTS test_r2; CREATE TABLE test_r1 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || ' -') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r1') ORDER BY "\\"; +') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r1') ORDER BY "\\"; INSERT INTO test_r1 ("\\") VALUES ('\\'); CREATE TABLE test_r2 (x UInt64, "\\" String DEFAULT '\r\n\t\\' || ' -') ENGINE = ReplicatedMergeTree('/clickhouse/test', 'r2') ORDER BY "\\"; +') ENGINE = ReplicatedMergeTree('/clickhouse/test_01669', 'r2') ORDER BY "\\"; SYSTEM SYNC REPLICA test_r2; diff --git a/tests/queries/0_stateless/01671_ddl_hang_timeout.reference b/tests/queries/0_stateless/01671_ddl_hang_timeout.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01671_ddl_hang_timeout.sh b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh new file mode 100755 index 00000000000..2ca97e3978b --- /dev/null +++ b/tests/queries/0_stateless/01671_ddl_hang_timeout.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function thread_create_drop_table { + while true; do + REPLICA=$(($RANDOM % 10)) + $CLICKHOUSE_CLIENT --query "CREATE TABLE IF NOT EXISTS t1 (x UInt64, s Array(Nullable(String))) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01671/test_01671', 'r_$REPLICA') order by x" 2>/dev/null + sleep 0.0$RANDOM + $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t1" + done +} + +function thread_alter_table { + while true; do + $CLICKHOUSE_CLIENT --query "ALTER TABLE $CLICKHOUSE_DATABASE.t1 on cluster test_shard_localhost ADD COLUMN newcol UInt32" >/dev/null 2>&1 + sleep 0.0$RANDOM + done +} + +export -f thread_create_drop_table +export -f thread_alter_table +timeout 20 bash -c "thread_create_drop_table" & +timeout 20 bash -c 'thread_alter_table' & +wait +sleep 1 + +$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS t1"; diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.reference b/tests/queries/0_stateless/01671_merge_join_and_constants.reference index 114fc9ff91f..efd814df893 100644 --- a/tests/queries/0_stateless/01671_merge_join_and_constants.reference +++ b/tests/queries/0_stateless/01671_merge_join_and_constants.reference @@ -3,3 +3,4 @@ │ b │ 2018-01-01 │ B │ 2018-01-01 │ 0.10 │ │ c │ 2018-01-01 │ C │ 2018-01-01 │ 0.10 │ └───┴────────────┴───┴────────────┴───────────┘ +\N \N \N \N 0 0 diff --git a/tests/queries/0_stateless/01671_merge_join_and_constants.sql b/tests/queries/0_stateless/01671_merge_join_and_constants.sql index 8611a96a723..c34f8e6705d 100644 --- a/tests/queries/0_stateless/01671_merge_join_and_constants.sql +++ b/tests/queries/0_stateless/01671_merge_join_and_constants.sql @@ -11,5 +11,7 @@ set join_algorithm = 'partial_merge'; SELECT * FROM table1 AS t1 ALL LEFT JOIN (SELECT *, '0.10', c, d AS b FROM table2) AS t2 USING (a, b) ORDER BY d ASC FORMAT PrettyCompact settings max_rows_in_join = 1; +SELECT pow('0.0000000257', NULL), pow(pow(NULL, NULL), NULL) - NULL, (val + NULL) = (rval * 0), * FROM (SELECT (val + 256) = (NULL * NULL), toLowCardinality(toNullable(dummy)) AS val FROM system.one) AS s1 ANY LEFT JOIN (SELECT toLowCardinality(dummy) AS rval FROM system.one) AS s2 ON (val + 0) = (rval * 255) settings max_rows_in_join = 1; + DROP TABLE IF EXISTS table1; DROP TABLE IF EXISTS table2; diff --git a/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference new file mode 100644 index 00000000000..9daeafb9864 --- /dev/null +++ b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.reference @@ -0,0 +1 @@ +test diff --git a/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh new file mode 100755 index 00000000000..ee75f675eb3 --- /dev/null +++ b/tests/queries/0_stateless/01674_clickhouse_client_query_param_cte.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT --param_paramName="test" -q "WITH subquery AS (SELECT {paramName:String}) SELECT * FROM subquery" diff --git a/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference new file mode 100644 index 00000000000..0777c3c2625 --- /dev/null +++ b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.reference @@ -0,0 +1,4 @@ +Value +Value +Value +Value diff --git a/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql new file mode 100644 index 00000000000..c98cb0a5e0f --- /dev/null +++ b/tests/queries/0_stateless/01674_executable_dictionary_implicit_key.sql @@ -0,0 +1,5 @@ +SELECT dictGet('simple_executable_cache_dictionary_no_implicit_key', 'value', toUInt64(1)); +SELECT dictGet('simple_executable_cache_dictionary_implicit_key', 'value', toUInt64(1)); + +SELECT dictGet('complex_executable_cache_dictionary_no_implicit_key', 'value', (toUInt64(1), 'FirstKey')); +SELECT dictGet('complex_executable_cache_dictionary_implicit_key', 'value', (toUInt64(1), 'FirstKey')); diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.reference b/tests/queries/0_stateless/01674_filter_by_uint8.reference new file mode 100644 index 00000000000..6b522898280 --- /dev/null +++ b/tests/queries/0_stateless/01674_filter_by_uint8.reference @@ -0,0 +1,8 @@ +0 +0 +255 +1 ['foo','bar'] 1 1 +2 ['foo','bar'] 2 1 +3 ['foo','bar'] 3 1 +4 ['foo','bar'] 4 1 +5 ['foo','bar'] 5 1 diff --git a/tests/queries/0_stateless/01674_filter_by_uint8.sql b/tests/queries/0_stateless/01674_filter_by_uint8.sql new file mode 100644 index 00000000000..960153d9c5a --- /dev/null +++ b/tests/queries/0_stateless/01674_filter_by_uint8.sql @@ -0,0 +1,14 @@ +-- ORDER BY is to trigger comparison at uninitialized memory after bad filtering. +SELECT ignore(number) FROM numbers(256) ORDER BY arrayFilter(x -> materialize(255), materialize([257])) LIMIT 1; +SELECT ignore(number) FROM numbers(256) ORDER BY arrayFilter(x -> materialize(255), materialize(['257'])) LIMIT 1; + +SELECT count() FROM numbers(256) WHERE toUInt8(number); + +DROP TABLE IF EXISTS t_filter; +CREATE TABLE t_filter(s String, a Array(FixedString(3)), u UInt64, f UInt8) +ENGINE = MergeTree ORDER BY u; + +INSERT INTO t_filter SELECT toString(number), ['foo', 'bar'], number, toUInt8(number) FROM numbers(1000); +SELECT * FROM t_filter WHERE f LIMIT 5; + +DROP TABLE IF EXISTS t_filter; diff --git a/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference new file mode 100644 index 00000000000..63b3707b9b4 --- /dev/null +++ b/tests/queries/0_stateless/01674_htm_xml_coarse_parse.reference @@ -0,0 +1,9 @@ + + +Here is CDTATA. +This is a white space test. +This is a complex test. world '); +SELECT htmlOrXmlCoarseParse(''); +SELECT htmlOrXmlCoarseParse('This is a white space test.'); +SELECT htmlOrXmlCoarseParse('This is a complex test. Hello, world ]]>world ]]> hello\n]]>hello\n'); +DROP TABLE IF EXISTS defaults; +CREATE TABLE defaults +( + stringColumn String +) ENGINE = Memory(); + +INSERT INTO defaults values ('hello, world'), (''), (''), ('white space collapse'); + +SELECT htmlOrXmlCoarseParse(stringColumn) FROM defaults; +DROP table defaults; diff --git a/tests/queries/0_stateless/01674_unicode_asan.reference b/tests/queries/0_stateless/01674_unicode_asan.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/01674_unicode_asan.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/01674_unicode_asan.sql b/tests/queries/0_stateless/01674_unicode_asan.sql new file mode 100644 index 00000000000..2ba34b46f93 --- /dev/null +++ b/tests/queries/0_stateless/01674_unicode_asan.sql @@ -0,0 +1,3 @@ +SELECT positionCaseInsensitiveUTF8('иголка.ру', 'иголка.р\0') AS res; +SELECT positionCaseInsensitiveUTF8('иголка.ру', randomString(rand() % 100)) FROM system.numbers; -- { serverError 2 } +SELECT sum(ignore(positionCaseInsensitiveUTF8('иголка.ру', randomString(rand() % 2)))) FROM numbers(1000000); diff --git a/tests/queries/0_stateless/01674_where_prewhere_array_crash.reference b/tests/queries/0_stateless/01674_where_prewhere_array_crash.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql b/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql new file mode 100644 index 00000000000..d6eef000b36 --- /dev/null +++ b/tests/queries/0_stateless/01674_where_prewhere_array_crash.sql @@ -0,0 +1,5 @@ +drop table if exists tab; +create table tab (x UInt64, `arr.a` Array(UInt64), `arr.b` Array(UInt64)) engine = MergeTree order by x; +select x from tab array join arr prewhere x != 0 where arr; -- { serverError 47; } +select x from tab array join arr prewhere arr where x != 0; -- { serverError 47; } +drop table if exists tab; diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.reference b/tests/queries/0_stateless/01675_data_type_coroutine.reference new file mode 100644 index 00000000000..7326d960397 --- /dev/null +++ b/tests/queries/0_stateless/01675_data_type_coroutine.reference @@ -0,0 +1 @@ +Ok diff --git a/tests/queries/0_stateless/01675_data_type_coroutine.sh b/tests/queries/0_stateless/01675_data_type_coroutine.sh new file mode 100755 index 00000000000..781e43e4134 --- /dev/null +++ b/tests/queries/0_stateless/01675_data_type_coroutine.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +I=0 +while true +do + I=$((I + 1)) + TYPE=$(perl -e "print 'Array(' x $I; print 'UInt8'; print ')' x $I") + ${CLICKHOUSE_CLIENT} --max_parser_depth 1000000 --query "SELECT * FROM remote('127.0.0.{1,2}', generateRandom('x $TYPE', 1, 1, 1)) LIMIT 1 FORMAT Null" 2>&1 | grep -q -F 'Maximum parse depth' && break; +done + +#echo "I = ${I}" +echo 'Ok' diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh new file mode 100755 index 00000000000..08e07044841 --- /dev/null +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function test_completion_word() +{ + local w=$1 && shift + + local w_len=${#w} + local compword_begin=${w:0:$((w_len-3))} + local compword_end=${w:$((w_len-3))} + + # NOTE: here and below you should escape variables of the expect. + timeout 22s expect << EOF +log_user 0 +set timeout 3 +match_max 100000 +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} + +spawn bash -c "$CLICKHOUSE_CLIENT_BINARY $CLICKHOUSE_CLIENT_OPT" +expect ":) " + +# Make a query +send -- "SET $compword_begin" +expect "SET $compword_begin" + +# Wait for suggestions to load, they are loaded in background +set is_done 0 +while {\$is_done == 0} { + send -- "\\t" + expect { + "$compword_begin$compword_end" { + set is_done 1 + } + default { + sleep 1 + } + } +} + +send -- "\\3\\4" +expect eof +EOF +} + +# last 3 bytes will be completed, +# so take this in mind when you will update the list. +compwords_positive=( + # system.functions + concatAssumeInjective + # system.table_engines + ReplacingMergeTree + # system.formats + JSONEachRow + # system.table_functions + clusterAllReplicas + # system.data_type_families + SimpleAggregateFunction + # system.merge_tree_settings + write_ahead_log_interval_ms_to_fsync + # system.settings + max_concurrent_queries_for_all_users + # system.clusters + test_shard_localhost + # system.errors, also it is very rare to cover system_events_show_zero_values + CONDITIONAL_TREE_PARENT_NOT_FOUND + # system.events, also it is very rare to cover system_events_show_zero_values + WriteBufferFromFileDescriptorWriteFailed + # system.asynchronous_metrics, also this metric has zero value + # + # NOTE: that there is no ability to complete metrics like + # jemalloc.background_thread.num_runs, due to "." is used as a word breaker + # (and this cannot be changed -- db.table) + ReplicasMaxAbsoluteDelay + # system.metrics + PartsPreCommitted + # system.macros + default_path_test + # system.storage_policies, egh not uniq + default + # system.aggregate_function_combinators + uniqCombined64ForEach + + # FIXME: one may add separate case for suggestion_limit + # system.databases + system + # system.tables + aggregate_function_combinators + # system.columns + primary_key_bytes_in_memory_allocated + # system.dictionaries + # FIXME: none +) +for w in "${compwords_positive[@]}"; do + test_completion_word "$w" || echo "[FAIL] $w (positive)" +done + +# One negative is enough +compwords_negative=( + # system.clusters + test_shard_localhost_no_such_cluster +) +for w in "${compwords_negative[@]}"; do + test_completion_word "$w" && echo "[FAIL] $w (negative)" +done + +exit 0 diff --git a/tests/queries/0_stateless/01676_dictget_in_default_expression.reference b/tests/queries/0_stateless/01676_dictget_in_default_expression.reference new file mode 100644 index 00000000000..608f9904d26 --- /dev/null +++ b/tests/queries/0_stateless/01676_dictget_in_default_expression.reference @@ -0,0 +1,11 @@ +2 20 +3 15 +status: +LOADED +status_after_detach_and_attach: +NOT_LOADED +2 20 +3 15 +4 40 +status: +LOADED diff --git a/tests/queries/0_stateless/01676_dictget_in_default_expression.sql b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql new file mode 100644 index 00000000000..cd4feaf5709 --- /dev/null +++ b/tests/queries/0_stateless/01676_dictget_in_default_expression.sql @@ -0,0 +1,31 @@ +DROP DATABASE IF EXISTS test_01676 SYNC; + +CREATE DATABASE test_01676; + +CREATE TABLE test_01676.dict_data (key UInt64, value UInt64) ENGINE=MergeTree ORDER BY tuple(); +INSERT INTO test_01676.dict_data VALUES (2,20), (3,30), (4,40), (5,50); + +CREATE DICTIONARY test_01676.dict (key UInt64, value UInt64) PRIMARY KEY key SOURCE(CLICKHOUSE(DB 'test_01676' TABLE 'dict_data' HOST '127.0.0.1' PORT tcpPort())) LIFETIME(0) LAYOUT(HASHED()); + +CREATE TABLE test_01676.table (x UInt64, y UInt64 DEFAULT dictGet('test_01676.dict', 'value', x)) ENGINE=MergeTree ORDER BY tuple(); +INSERT INTO test_01676.table (x) VALUES (2); +INSERT INTO test_01676.table VALUES (toUInt64(3), toUInt64(15)); + +SELECT * FROM test_01676.table ORDER BY x; + +SELECT 'status:'; +SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict'; + +DETACH DATABASE test_01676; +ATTACH DATABASE test_01676; + +SELECT 'status_after_detach_and_attach:'; +SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict'; + +INSERT INTO test_01676.table (x) VALUES (toInt64(4)); +SELECT * FROM test_01676.table ORDER BY x; + +SELECT 'status:'; +SELECT status FROM system.dictionaries WHERE database='test_01676' AND name='dict'; + +DROP DATABASE test_01676; diff --git a/tests/queries/0_stateless/01676_reinterpret_as.reference b/tests/queries/0_stateless/01676_reinterpret_as.reference new file mode 100644 index 00000000000..459ca166dc1 --- /dev/null +++ b/tests/queries/0_stateless/01676_reinterpret_as.reference @@ -0,0 +1,41 @@ +Into String +1 +Into FixedString +1 +1\0 +1\0\0 +1\0\0\0 +1 +Into Numeric Representable +Integer and Integer types +1 1 +1 1 +257 257 +257 257 +257 257 +257 257 +257 257 +257 257 +257 257 +257 257 +257 257 +Integer and Float types +1045220557 1045220557 +4596373779694328218 4596373779694328218 +0.2 1045220557 +0.2 4596373779694328218 +Integer and String types +1 1 49 +1 1 49 +11 11 12593 +Dates +1970-01-01 1970-01-01 +1970-01-01 03:00:00 1970-01-01 03:00:00 +1970-01-01 03:00:00.000 1970-01-01 03:00:00.000 +Decimals +5.00 0.49 +5.00 0.49 +5.00 0.49 +5.00 0.49 +0.00 +ReinterpretErrors diff --git a/tests/queries/0_stateless/01676_reinterpret_as.sql b/tests/queries/0_stateless/01676_reinterpret_as.sql new file mode 100644 index 00000000000..5eb94ed0a13 --- /dev/null +++ b/tests/queries/0_stateless/01676_reinterpret_as.sql @@ -0,0 +1,42 @@ +SELECT 'Into String'; +SELECT reinterpret(49, 'String'); +SELECT 'Into FixedString'; +SELECT reinterpret(49, 'FixedString(1)'); +SELECT reinterpret(49, 'FixedString(2)'); +SELECT reinterpret(49, 'FixedString(3)'); +SELECT reinterpret(49, 'FixedString(4)'); +SELECT reinterpretAsFixedString(49); +SELECT 'Into Numeric Representable'; +SELECT 'Integer and Integer types'; +SELECT reinterpret(257, 'UInt8'), reinterpretAsUInt8(257); +SELECT reinterpret(257, 'Int8'), reinterpretAsInt8(257); +SELECT reinterpret(257, 'UInt16'), reinterpretAsUInt16(257); +SELECT reinterpret(257, 'Int16'), reinterpretAsInt16(257); +SELECT reinterpret(257, 'UInt32'), reinterpretAsUInt32(257); +SELECT reinterpret(257, 'Int32'), reinterpretAsInt32(257); +SELECT reinterpret(257, 'UInt64'), reinterpretAsUInt64(257); +SELECT reinterpret(257, 'Int64'), reinterpretAsInt64(257); +SELECT reinterpret(257, 'Int128'), reinterpretAsInt128(257); +SELECT reinterpret(257, 'UInt256'), reinterpretAsUInt256(257); +SELECT reinterpret(257, 'Int256'), reinterpretAsInt256(257); +SELECT 'Integer and Float types'; +SELECT reinterpret(toFloat32(0.2), 'UInt32'), reinterpretAsUInt32(toFloat32(0.2)); +SELECT reinterpret(toFloat64(0.2), 'UInt64'), reinterpretAsUInt64(toFloat64(0.2)); +SELECT reinterpretAsFloat32(a), reinterpretAsUInt32(toFloat32(0.2)) as a; +SELECT reinterpretAsFloat64(a), reinterpretAsUInt64(toFloat64(0.2)) as a; +SELECT 'Integer and String types'; +SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('1') as a; +SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt8('11') as a; +SELECT reinterpret(a, 'String'), reinterpretAsString(a), reinterpretAsUInt16('11') as a; +SELECT 'Dates'; +SELECT reinterpret(0, 'Date'), reinterpret('', 'Date'); +SELECT reinterpret(0, 'DateTime'), reinterpret('', 'DateTime'); +SELECT reinterpret(0, 'DateTime64'), reinterpret('', 'DateTime64'); +SELECT 'Decimals'; +SELECT reinterpret(toDecimal32(5, 2), 'Decimal32(2)'), reinterpret('1', 'Decimal32(2)'); +SELECT reinterpret(toDecimal64(5, 2), 'Decimal64(2)'), reinterpret('1', 'Decimal64(2)');; +SELECT reinterpret(toDecimal128(5, 2), 'Decimal128(2)'), reinterpret('1', 'Decimal128(2)'); +SELECT reinterpret(toDecimal256(5, 2), 'Decimal256(2)'), reinterpret('1', 'Decimal256(2)'); +SELECT reinterpret(toDateTime64(0, 0), 'Decimal64(2)'); +SELECT 'ReinterpretErrors'; +SELECT reinterpret('123', 'FixedString(1)'); -- {serverError 43} diff --git a/tests/queries/0_stateless/01676_round_int_ubsan.reference b/tests/queries/0_stateless/01676_round_int_ubsan.reference new file mode 100644 index 00000000000..cec4825deb9 --- /dev/null +++ b/tests/queries/0_stateless/01676_round_int_ubsan.reference @@ -0,0 +1 @@ +9223372036854775700 diff --git a/tests/queries/0_stateless/01676_round_int_ubsan.sql b/tests/queries/0_stateless/01676_round_int_ubsan.sql new file mode 100644 index 00000000000..45aa5706a05 --- /dev/null +++ b/tests/queries/0_stateless/01676_round_int_ubsan.sql @@ -0,0 +1,6 @@ +-- Overflow during integer rounding is implementation specific behaviour. +-- This test allows to be aware if the impkementation changes. +-- Changing the implementation specific behaviour is Ok. +-- and should not be treat as incompatibility (simply update test result then). + +SELECT round(-9223372036854775808, -2); diff --git a/tests/queries/0_stateless/01677_array_enumerate_bug.reference b/tests/queries/0_stateless/01677_array_enumerate_bug.reference new file mode 100644 index 00000000000..9c0e526801f --- /dev/null +++ b/tests/queries/0_stateless/01677_array_enumerate_bug.reference @@ -0,0 +1,2 @@ +[1,1,2] +[1,1,1] diff --git a/tests/queries/0_stateless/01677_array_enumerate_bug.sql b/tests/queries/0_stateless/01677_array_enumerate_bug.sql new file mode 100644 index 00000000000..0db0c51fe5b --- /dev/null +++ b/tests/queries/0_stateless/01677_array_enumerate_bug.sql @@ -0,0 +1,13 @@ +-- there was a bug - missing check of the total size of keys for the case with hash table with 128bit key. + +SELECT arrayEnumerateUniq(arrayEnumerateUniq([toInt256(10), toInt256(100), toInt256(2)]), [toInt256(123), toInt256(1023), toInt256(123)]); + +SELECT arrayEnumerateUniq( + [111111, 222222, 333333], + [444444, 555555, 666666], + [111111, 222222, 333333], + [444444, 555555, 666666], + [111111, 222222, 333333], + [444444, 555555, 666666], + [111111, 222222, 333333], + [444444, 555555, 666666]); diff --git a/tests/queries/0_stateless/01677_bit_float.reference b/tests/queries/0_stateless/01677_bit_float.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01677_bit_float.sql b/tests/queries/0_stateless/01677_bit_float.sql new file mode 100644 index 00000000000..3692d8ac6a5 --- /dev/null +++ b/tests/queries/0_stateless/01677_bit_float.sql @@ -0,0 +1,9 @@ +SELECT bitAnd(0, inf); -- { serverError 43 } +SELECT bitXor(0, inf); -- { serverError 43 } +SELECT bitOr(0, inf); -- { serverError 43 } +SELECT bitTest(inf, 0); -- { serverError 43 } +SELECT bitTest(0, inf); -- { serverError 43 } +SELECT bitRotateLeft(inf, 0); -- { serverError 43 } +SELECT bitRotateRight(inf, 0); -- { serverError 43 } +SELECT bitShiftLeft(inf, 0); -- { serverError 43 } +SELECT bitShiftRight(inf, 0); -- { serverError 43 } diff --git a/tests/queries/0_stateless/01678_great_circle_angle.reference b/tests/queries/0_stateless/01678_great_circle_angle.reference new file mode 100644 index 00000000000..0373970e3bd --- /dev/null +++ b/tests/queries/0_stateless/01678_great_circle_angle.reference @@ -0,0 +1,5 @@ +0.1224 +0.7071 +0.7135 +10007554 +10007554 diff --git a/tests/queries/0_stateless/01678_great_circle_angle.sql b/tests/queries/0_stateless/01678_great_circle_angle.sql new file mode 100644 index 00000000000..124c7bfadf2 --- /dev/null +++ b/tests/queries/0_stateless/01678_great_circle_angle.sql @@ -0,0 +1,6 @@ +SELECT round(greatCircleAngle(0, 45, 0.1, 45.1), 4); +SELECT round(greatCircleAngle(0, 45, 1, 45), 4); +SELECT round(greatCircleAngle(0, 45, 1, 45.1), 4); + +SELECT round(greatCircleDistance(0, 0, 0, 90), 4); +SELECT round(greatCircleDistance(0, 0, 90, 0), 4); diff --git a/tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference new file mode 100644 index 00000000000..5446cd475b0 --- /dev/null +++ b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.reference @@ -0,0 +1,3 @@ +inf +-inf +nan diff --git a/tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql new file mode 100644 index 00000000000..ac92dec2bee --- /dev/null +++ b/tests/queries/0_stateless/01679_format_readable_time_delta_inf.sql @@ -0,0 +1 @@ +SELECT formatReadableTimeDelta(arrayJoin([inf, -inf, nan])); diff --git a/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh new file mode 100755 index 00000000000..e8d89c2e45a --- /dev/null +++ b/tests/queries/0_stateless/01679_incorrect_data_on_insert_collapsing.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS collapsing_merge_tree" + +${CLICKHOUSE_CLIENT} --query "CREATE TABLE collapsing_merge_tree (key UInt32, sign Int8, date Datetime) ENGINE=CollapsingMergeTree(sign) PARTITION BY date ORDER BY key" + +${CLICKHOUSE_CLIENT} --query "INSERT INTO collapsing_merge_tree VALUES (1, -117, '2020-01-01')" 2>&1 | grep -q 'Incorrect data: Sign = -117' && echo 'OK' || echo 'FAIL'; + +${CLICKHOUSE_CLIENT} --query "DROP TABLE collapsing_merge_tree;" + diff --git a/tests/queries/0_stateless/01680_date_time_add_ubsan.reference b/tests/queries/0_stateless/01680_date_time_add_ubsan.reference new file mode 100644 index 00000000000..dec7d2fabd2 --- /dev/null +++ b/tests/queries/0_stateless/01680_date_time_add_ubsan.reference @@ -0,0 +1 @@ +\N diff --git a/tests/queries/0_stateless/01680_date_time_add_ubsan.sql b/tests/queries/0_stateless/01680_date_time_add_ubsan.sql new file mode 100644 index 00000000000..f4690116e1a --- /dev/null +++ b/tests/queries/0_stateless/01680_date_time_add_ubsan.sql @@ -0,0 +1,2 @@ +SELECT DISTINCT result FROM (SELECT toStartOfFifteenMinutes(toDateTime(toStartOfFifteenMinutes(toDateTime(1000.0001220703125) + (number * 65536))) + (number * 9223372036854775807)) AS result FROM system.numbers LIMIT 1048576) ORDER BY result DESC NULLS FIRST FORMAT Null; +SELECT round(round(round(round(round(100)), round(round(round(round(NULL), round(65535)), toTypeName(now() + 9223372036854775807) LIKE 'DateTime%DateTime%DateTime%DateTime%', round(-2)), 255), round(NULL)))); diff --git a/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql new file mode 100644 index 00000000000..181e7109cd4 --- /dev/null +++ b/tests/queries/0_stateless/01680_predicate_pushdown_union_distinct_subquery.sql @@ -0,0 +1 @@ +SELECT count() FROM (SELECT 2000 AS d_year UNION DISTINCT SELECT 2000 AS d_year) WHERE d_year = 2002 diff --git a/tests/queries/0_stateless/01681_arg_min_max_if_fix.reference b/tests/queries/0_stateless/01681_arg_min_max_if_fix.reference new file mode 100644 index 00000000000..75a0b4104b3 --- /dev/null +++ b/tests/queries/0_stateless/01681_arg_min_max_if_fix.reference @@ -0,0 +1 @@ +0 0 2 diff --git a/tests/queries/0_stateless/01681_arg_min_max_if_fix.sql b/tests/queries/0_stateless/01681_arg_min_max_if_fix.sql new file mode 100644 index 00000000000..b0aab898536 --- /dev/null +++ b/tests/queries/0_stateless/01681_arg_min_max_if_fix.sql @@ -0,0 +1 @@ +SELECT bitAnd(number, toUInt64(pow(257, 20) - 1048576)) AS k, argMaxIf(k, if((number % 255) = 256, toInt256(65535), number), number > 42), uniq(number) AS u FROM numbers(2) GROUP BY toInt256(-2, NULL), k; diff --git a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference new file mode 100644 index 00000000000..2616e6c2a5c --- /dev/null +++ b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.reference @@ -0,0 +1,10 @@ +NullableTuple with transform_null_in=0 +NullableTuple with transform_null_in=1 +NullableColumnFromCast with transform_null_in=0 +1 test +NullableColumnFromCast with transform_null_in=1 +1 test +NullableColumnFromTable with transform_null_in=0 +1 test +NullableColumnFromTable with transform_null_in=1 +1 test diff --git a/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql new file mode 100644 index 00000000000..4af1f74fca6 --- /dev/null +++ b/tests/queries/0_stateless/01681_bloom_filter_nullable_column.sql @@ -0,0 +1,51 @@ +DROP TABLE IF EXISTS bloom_filter_nullable_index; +CREATE TABLE bloom_filter_nullable_index + ( + order_key UInt64, + str Nullable(String), + + INDEX idx (str) TYPE bloom_filter GRANULARITY 1 + ) + ENGINE = MergeTree() + ORDER BY order_key SETTINGS index_granularity = 6; + +INSERT INTO bloom_filter_nullable_index VALUES (1, 'test'); +INSERT INTO bloom_filter_nullable_index VALUES (2, 'test2'); + +SELECT 'NullableTuple with transform_null_in=0'; +SELECT * FROM bloom_filter_nullable_index WHERE str IN + (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 0; +SELECT * FROM bloom_filter_nullable_index WHERE str IN + (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 0; + +SELECT 'NullableTuple with transform_null_in=1'; + +SELECT * FROM bloom_filter_nullable_index WHERE str IN + (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 } + +SELECT * FROM bloom_filter_nullable_index WHERE str IN + (SELECT '1048576', str FROM bloom_filter_nullable_index) SETTINGS transform_null_in = 1; -- { serverError 20 } + + +SELECT 'NullableColumnFromCast with transform_null_in=0'; +SELECT * FROM bloom_filter_nullable_index WHERE str IN + (SELECT cast('test', 'Nullable(String)')) SETTINGS transform_null_in = 0; + +SELECT 'NullableColumnFromCast with transform_null_in=1'; +SELECT * FROM bloom_filter_nullable_index WHERE str IN + (SELECT cast('test', 'Nullable(String)')) SETTINGS transform_null_in = 1; + +DROP TABLE IF EXISTS nullable_string_value; +CREATE TABLE nullable_string_value (value Nullable(String)) ENGINE=TinyLog; +INSERT INTO nullable_string_value VALUES ('test'); + +SELECT 'NullableColumnFromTable with transform_null_in=0'; +SELECT * FROM bloom_filter_nullable_index WHERE str IN + (SELECT value FROM nullable_string_value) SETTINGS transform_null_in = 0; + +SELECT 'NullableColumnFromTable with transform_null_in=1'; +SELECT * FROM bloom_filter_nullable_index WHERE str IN + (SELECT value FROM nullable_string_value) SETTINGS transform_null_in = 1; + +DROP TABLE nullable_string_value; +DROP TABLE bloom_filter_nullable_index; diff --git a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference new file mode 100644 index 00000000000..7326d960397 --- /dev/null +++ b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.reference @@ -0,0 +1 @@ +Ok diff --git a/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh new file mode 100755 index 00000000000..0bbf8942c1a --- /dev/null +++ b/tests/queries/0_stateless/01681_hyperscan_debug_assertion.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# We throw our own exception from operator new. +# In previous versions of Hyperscan it triggered debug assertion as it only expected std::bad_alloc. + +M=1000000 + +while true +do + $CLICKHOUSE_CLIENT --allow_hyperscan 1 --max_memory_usage $M --format Null --query " + SELECT [1, 2, 3, 11] = arraySort(multiMatchAllIndices('фабрикант', ['', 'рикан', 'а', 'f[a${RANDOM}e]b[ei]rl', 'ф[иа${RANDOM}эе]б[еэи][рпл]', 'афиукд', 'a[f${RANDOM}t],th', '^ф[аие${RANDOM}э]?б?[еэи]?$', 'бе${RANDOM}рлик', 'fa${RANDOM}b', 'фа[беьв]+е?[рл${RANDOM}ко]'])) + " 2>&1 | grep -q 'Memory limit' || break; + + M=$((M + 100000)) +done + +echo 'Ok' diff --git a/tests/queries/0_stateless/01682_gather_utils_ubsan.reference b/tests/queries/0_stateless/01682_gather_utils_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01682_gather_utils_ubsan.sql b/tests/queries/0_stateless/01682_gather_utils_ubsan.sql new file mode 100644 index 00000000000..2388586e8fe --- /dev/null +++ b/tests/queries/0_stateless/01682_gather_utils_ubsan.sql @@ -0,0 +1 @@ +SELECT arrayResize([1, 2, 3], -9223372036854775808); -- { serverError 128 } diff --git a/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference new file mode 100644 index 00000000000..be589c9ceb0 --- /dev/null +++ b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.reference @@ -0,0 +1,4 @@ +1 +1 +2 +2 diff --git a/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql new file mode 100644 index 00000000000..eaf15ed9fd8 --- /dev/null +++ b/tests/queries/0_stateless/01683_dist_INSERT_block_structure_mismatch.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS tmp_01683; +DROP TABLE IF EXISTS dist_01683; + +SET prefer_localhost_replica=0; +-- To suppress "Structure does not match (remote: n Int8 Int8(size = 0), local: n UInt64 UInt64(size = 1)), implicit conversion will be done." +SET send_logs_level='error'; + +CREATE TABLE tmp_01683 (n Int8) ENGINE=Memory; +CREATE TABLE dist_01683 (n UInt64) Engine=Distributed(test_cluster_two_shards, currentDatabase(), tmp_01683, n); + +SET insert_distributed_sync=1; +INSERT INTO dist_01683 VALUES (1),(2); + +SET insert_distributed_sync=0; +INSERT INTO dist_01683 VALUES (1),(2); +SYSTEM FLUSH DISTRIBUTED dist_01683; + +-- TODO: cover distributed_directory_monitor_batch_inserts=1 + +SELECT * FROM tmp_01683 ORDER BY n; + +DROP TABLE tmp_01683; +DROP TABLE dist_01683; diff --git a/tests/queries/0_stateless/01683_intdiv_ubsan.reference b/tests/queries/0_stateless/01683_intdiv_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01683_intdiv_ubsan.sql b/tests/queries/0_stateless/01683_intdiv_ubsan.sql new file mode 100644 index 00000000000..adac2505745 --- /dev/null +++ b/tests/queries/0_stateless/01683_intdiv_ubsan.sql @@ -0,0 +1 @@ +SELECT DISTINCT intDiv(number, nan) FROM numbers(10); -- { serverError 153 } diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.reference b/tests/queries/0_stateless/01683_text_log_deadlock.reference new file mode 100644 index 00000000000..76de47c4a46 --- /dev/null +++ b/tests/queries/0_stateless/01683_text_log_deadlock.reference @@ -0,0 +1 @@ +queries 25000 diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh new file mode 100755 index 00000000000..ee772bffa27 --- /dev/null +++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries 25000' diff --git a/tests/queries/0_stateless/01684_geohash_ubsan.reference b/tests/queries/0_stateless/01684_geohash_ubsan.reference new file mode 100644 index 00000000000..2ae4be53dea --- /dev/null +++ b/tests/queries/0_stateless/01684_geohash_ubsan.reference @@ -0,0 +1 @@ +['ypzpgxczgpyr'] diff --git a/tests/queries/0_stateless/01684_geohash_ubsan.sql b/tests/queries/0_stateless/01684_geohash_ubsan.sql new file mode 100644 index 00000000000..e7eb9c526b4 --- /dev/null +++ b/tests/queries/0_stateless/01684_geohash_ubsan.sql @@ -0,0 +1 @@ +SELECT geohashesInBox(100.0000991821289, 100.0000991821289, 1000.0001220703125, 1000.0001220703125, 0); diff --git a/tests/queries/0_stateless/01684_insert_specify_shard_id.reference b/tests/queries/0_stateless/01684_insert_specify_shard_id.reference new file mode 100644 index 00000000000..1cd79d87135 --- /dev/null +++ b/tests/queries/0_stateless/01684_insert_specify_shard_id.reference @@ -0,0 +1,120 @@ +0 +0 +1 +1 +2 +2 +3 +3 +4 +4 +5 +5 +6 +6 +7 +7 +8 +8 +9 +9 +0 +0 +1 +1 +2 +2 +3 +3 +4 +4 +5 +5 +6 +6 +7 +7 +8 +8 +9 +9 +0 +0 +1 +1 +2 +2 +3 +3 +4 +4 +5 +5 +6 +6 +7 +7 +8 +8 +9 +9 +10 +10 +11 +11 +12 +12 +13 +13 +14 +14 +15 +15 +16 +16 +17 +17 +18 +18 +19 +19 +0 +0 +1 +1 +2 +2 +3 +3 +4 +4 +5 +5 +6 +6 +7 +7 +8 +8 +9 +9 +10 +10 +11 +11 +12 +12 +13 +13 +14 +14 +15 +15 +16 +16 +17 +17 +18 +18 +19 +19 diff --git a/tests/queries/0_stateless/01684_insert_specify_shard_id.sql b/tests/queries/0_stateless/01684_insert_specify_shard_id.sql new file mode 100644 index 00000000000..ce1c7807b59 --- /dev/null +++ b/tests/queries/0_stateless/01684_insert_specify_shard_id.sql @@ -0,0 +1,37 @@ +DROP TABLE IF EXISTS x; +DROP TABLE IF EXISTS x_dist; +DROP TABLE IF EXISTS y; +DROP TABLE IF EXISTS y_dist; + +CREATE TABLE x AS system.numbers ENGINE = MergeTree ORDER BY number; +CREATE TABLE y AS system.numbers ENGINE = MergeTree ORDER BY number; + +CREATE TABLE x_dist as x ENGINE = Distributed('test_cluster_two_shards', currentDatabase(), x); +CREATE TABLE y_dist as y ENGINE = Distributed('test_cluster_two_shards_localhost', currentDatabase(), y); + +-- insert into first shard +INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 1; +INSERT INTO y_dist SELECT * FROM numbers(10) settings insert_shard_id = 1; + +SELECT * FROM x_dist ORDER by number; +SELECT * FROM y_dist ORDER by number; + +-- insert into second shard +INSERT INTO x_dist SELECT * FROM numbers(10, 10) settings insert_shard_id = 2; +INSERT INTO y_dist SELECT * FROM numbers(10, 10) settings insert_shard_id = 2; + +SELECT * FROM x_dist ORDER by number; +SELECT * FROM y_dist ORDER by number; + +-- no sharding key +INSERT INTO x_dist SELECT * FROM numbers(10); -- { serverError 55 } +INSERT INTO y_dist SELECT * FROM numbers(10); -- { serverError 55 } + +-- invalid shard id +INSERT INTO x_dist SELECT * FROM numbers(10) settings insert_shard_id = 3; -- { serverError 1003 } +INSERT INTO y_dist SELECT * FROM numbers(10) settings insert_shard_id = 3; -- { serverError 1003 } + +DROP TABLE x; +DROP TABLE x_dist; +DROP TABLE y; +DROP TABLE y_dist; diff --git a/tests/queries/0_stateless/01685_json_extract_double_as_float.reference b/tests/queries/0_stateless/01685_json_extract_double_as_float.reference new file mode 100644 index 00000000000..f3f4206b425 --- /dev/null +++ b/tests/queries/0_stateless/01685_json_extract_double_as_float.reference @@ -0,0 +1,10 @@ +1.1 1.1 1.1 1.1 +0.01 0.01 0.01 0.01 +0 +\N +-1e300 +-inf +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/01685_json_extract_double_as_float.sql b/tests/queries/0_stateless/01685_json_extract_double_as_float.sql new file mode 100644 index 00000000000..c9aa2518085 --- /dev/null +++ b/tests/queries/0_stateless/01685_json_extract_double_as_float.sql @@ -0,0 +1,24 @@ +WITH '{ "v":1.1}' AS raw +SELECT + JSONExtract(raw, 'v', 'float') AS float32_1, + JSONExtract(raw, 'v', 'Float32') AS float32_2, + JSONExtractFloat(raw, 'v') AS float64_1, + JSONExtract(raw, 'v', 'double') AS float64_2; + +WITH '{ "v":1E-2}' AS raw +SELECT + JSONExtract(raw, 'v', 'float') AS float32_1, + JSONExtract(raw, 'v', 'Float32') AS float32_2, + JSONExtractFloat(raw, 'v') AS float64_1, + JSONExtract(raw, 'v', 'double') AS float64_2; + +SELECT JSONExtract('{"v":1.1}', 'v', 'UInt64'); +SELECT JSONExtract('{"v":1.1}', 'v', 'Nullable(UInt64)'); + +SELECT JSONExtract('{"v":-1e300}', 'v', 'Float64'); +SELECT JSONExtract('{"v":-1e300}', 'v', 'Float32'); + +SELECT JSONExtract('{"v":-1e300}', 'v', 'UInt64'); +SELECT JSONExtract('{"v":-1e300}', 'v', 'Int64'); +SELECT JSONExtract('{"v":-1e300}', 'v', 'UInt8'); +SELECT JSONExtract('{"v":-1e300}', 'v', 'Int8'); diff --git a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference new file mode 100644 index 00000000000..9766475a418 --- /dev/null +++ b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.reference @@ -0,0 +1 @@ +ok diff --git a/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql new file mode 100644 index 00000000000..a1b419527d4 --- /dev/null +++ b/tests/queries/0_stateless/01686_event_time_microseconds_part_log.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS table_with_single_pk; + +CREATE TABLE table_with_single_pk +( + key UInt8, + value String +) +ENGINE = MergeTree +ORDER BY key; + +INSERT INTO table_with_single_pk SELECT number, toString(number % 10) FROM numbers(10000000); + +SYSTEM FLUSH LOGS; + +WITH ( + SELECT (event_time, event_time_microseconds) + FROM system.part_log + ORDER BY event_time DESC + LIMIT 1 + ) AS time +SELECT if(dateDiff('second', toDateTime(time.2), toDateTime(time.1)) = 0, 'ok', 'fail'); + +DROP TABLE IF EXISTS table_with_single_pk; diff --git a/tests/queries/0_stateless/01686_rocksdb.reference b/tests/queries/0_stateless/01686_rocksdb.reference new file mode 100644 index 00000000000..fa4e12d51ff --- /dev/null +++ b/tests/queries/0_stateless/01686_rocksdb.reference @@ -0,0 +1,15 @@ +123 Hello, world (123) +-- +-- +123 Hello, world (123) +4567 Hello, world (4567) +-- +-- +0 Hello, world (0) +-- +123 Hello, world (123) +456 Hello, world (456) +-- +99 Hello, world (99) +999 Hello, world (999) +9999 Hello, world (9999) diff --git a/tests/queries/0_stateless/01686_rocksdb.sql b/tests/queries/0_stateless/01686_rocksdb.sql new file mode 100644 index 00000000000..c9b133acff3 --- /dev/null +++ b/tests/queries/0_stateless/01686_rocksdb.sql @@ -0,0 +1,27 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (key UInt64, value String) Engine=EmbeddedRocksDB PRIMARY KEY(key); + +INSERT INTO test SELECT number, format('Hello, world ({})', toString(number)) FROM numbers(10000); + +SELECT * FROM test WHERE key = 123; +SELECT '--'; +SELECT * FROM test WHERE key = -123; +SELECT '--'; +SELECT * FROM test WHERE key = 123 OR key = 4567 ORDER BY key; +SELECT '--'; +SELECT * FROM test WHERE key = NULL; +SELECT '--'; +SELECT * FROM test WHERE key = NULL OR key = 0; +SELECT '--'; +SELECT * FROM test WHERE key IN (123, 456, -123) ORDER BY key; +SELECT '--'; +SELECT * FROM test WHERE key = 'Hello'; -- { serverError 53 } + +DETACH TABLE test NO DELAY; +ATTACH TABLE test; + +SELECT * FROM test WHERE key IN (99, 999, 9999, -123) ORDER BY key; + +DROP TABLE IF EXISTS test; + diff --git a/tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference new file mode 100644 index 00000000000..b3c946cbaec --- /dev/null +++ b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.reference @@ -0,0 +1,2 @@ +[0] +[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] diff --git a/tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql new file mode 100644 index 00000000000..b2a5ab61efc --- /dev/null +++ b/tests/queries/0_stateless/01690_quantilesTiming_ubsan.sql @@ -0,0 +1,31 @@ +-- NOTE: that due to overflows it may give different result before +-- quantilesTimingWeighted() had been converted to double: +-- +-- Before: +-- +-- SELECT quantilesTimingWeighted(1)(number, 9223372036854775807) +-- FROM numbers(2) +-- +-- ┌─quantilesTimingWeighted(1)(number, 9223372036854775807)─┐ +-- │ [1] │ +-- └─────────────────────────────────────────────────────────┘ +-- +-- After: +-- +-- SELECT quantilesTimingWeighted(1)(number, 9223372036854775807) +-- FROM numbers(2) +-- +-- ┌─quantilesTimingWeighted(1)(number, 9223372036854775807)─┐ +-- │ [0] │ +-- └─────────────────────────────────────────────────────────┘ + +SELECT quantilesTimingWeighted(0.1)(number, 9223372036854775807) FROM numbers(2); + +-- same UB, but in the inner loop +SELECT quantilesTimingWeighted(0, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99, 0.999, 1)(number, 9223372036854775807) +FROM +( + SELECT number + FROM system.numbers + LIMIT 100 +); diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.reference b/tests/queries/0_stateless/01691_DateTime64_clamp.reference new file mode 100644 index 00000000000..3adc9a17e5c --- /dev/null +++ b/tests/queries/0_stateless/01691_DateTime64_clamp.reference @@ -0,0 +1,17 @@ +-- { echo } +SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow'); +1970-01-01 03:00:00.00 +SELECT toDateTime64(-2, 2, 'Europe/Moscow'); +1970-01-01 03:00:00.00 +SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow')); +1970-01-01 03:00:00 +SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow')); +2020-01-01 00:00:00 +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null; +SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow'); +1970-01-01 03:00:00.00 +SELECT toDateTime64(-2., 2, 'Europe/Moscow'); +1970-01-01 03:00:00.00 +SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow'); +2106-02-07 09:00:00.00 +SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null; diff --git a/tests/queries/0_stateless/01691_DateTime64_clamp.sql b/tests/queries/0_stateless/01691_DateTime64_clamp.sql new file mode 100644 index 00000000000..92d5a33328f --- /dev/null +++ b/tests/queries/0_stateless/01691_DateTime64_clamp.sql @@ -0,0 +1,10 @@ +-- { echo } +SELECT toTimeZone(toDateTime(-2, 2), 'Europe/Moscow'); +SELECT toDateTime64(-2, 2, 'Europe/Moscow'); +SELECT CAST(-1 AS DateTime64(0, 'Europe/Moscow')); +SELECT CAST('2020-01-01 00:00:00.3' AS DateTime64(0, 'Europe/Moscow')); +SELECT toDateTime64(bitShiftLeft(toUInt64(1), 33), 2, 'Europe/Moscow') FORMAT Null; +SELECT toTimeZone(toDateTime(-2., 2), 'Europe/Moscow'); +SELECT toDateTime64(-2., 2, 'Europe/Moscow'); +SELECT toDateTime64(toFloat32(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow'); +SELECT toDateTime64(toFloat64(bitShiftLeft(toUInt64(1),33)), 2, 'Europe/Moscow') FORMAT Null; diff --git a/tests/queries/0_stateless/01691_parser_data_type_exponential.reference b/tests/queries/0_stateless/01691_parser_data_type_exponential.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01691_parser_data_type_exponential.sh b/tests/queries/0_stateless/01691_parser_data_type_exponential.sh new file mode 100755 index 00000000000..2b1d34982a2 --- /dev/null +++ b/tests/queries/0_stateless/01691_parser_data_type_exponential.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# Check that DataType parser does not have exponential complexity in the case found by fuzzer. +for _ in {1..10}; do ${CLICKHOUSE_CLIENT} -n --testmode --query "SELECT CAST(1 AS A2222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222220000000000000000000000000000000000000000000000000000000000000000000000000000002260637443813394204 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggre222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 22222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 2222222222222eFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpio22222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmap, 00000000000000000000000000000000000000000000000000000000000000000000000000000001841416382, 222222222222222ggregateFuncpion(groupBitmap22222222222222222222222222222222222222222222222222222222222222222222222200000000000000000000178859639454016722222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmapp, 222222222222222ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateF222222222222222222222222222222222222222222222222222222222teFuncpion(groupBitmap, 222222222222223ggregateFuncpion(groupBitmap2222222222222222222222222222222222222222222222222222 222222222222222222ggregateFuncpion(groupBitmap, 22222222222222222222222222222222222222222222222222222222222222222222222222222222222222222, 222222222222222ggregateFuncpion(groupBitmap222222222222222222222222222222222222222222222222222222222222222222222222000000000000000000001788596394540167623 222222222222222222ggregateFu22222222222222222222222222 222222222, UInt33)); -- { clientError 62 }"; done diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference new file mode 100644 index 00000000000..a0562e40027 --- /dev/null +++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.reference @@ -0,0 +1,9 @@ +-- { echo } +select toDateTime64(toDateTime(1), 2); +1970-01-01 03:00:01.00 +select toDateTime64(toDate(1), 2); +1970-01-02 00:00:00.00 +select toDateTime64(toDateTime(1), 2, 'GMT'); +1970-01-01 00:00:01.00 +select toDateTime64(toDate(1), 2, 'GMT'); +1970-01-02 00:00:00.00 diff --git a/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql new file mode 100644 index 00000000000..60f76e9192c --- /dev/null +++ b/tests/queries/0_stateless/01692_DateTime64_from_DateTime.sql @@ -0,0 +1,5 @@ +-- { echo } +select toDateTime64(toDateTime(1), 2); +select toDateTime64(toDate(1), 2); +select toDateTime64(toDateTime(1), 2, 'GMT'); +select toDateTime64(toDate(1), 2, 'GMT'); diff --git a/tests/queries/0_stateless/01698_map_populate_overflow.reference b/tests/queries/0_stateless/01698_map_populate_overflow.reference new file mode 100644 index 00000000000..24e0038125a --- /dev/null +++ b/tests/queries/0_stateless/01698_map_populate_overflow.reference @@ -0,0 +1 @@ +([18446744073709551615],[0]) diff --git a/tests/queries/0_stateless/01698_map_populate_overflow.sql b/tests/queries/0_stateless/01698_map_populate_overflow.sql new file mode 100644 index 00000000000..90c47ff3949 --- /dev/null +++ b/tests/queries/0_stateless/01698_map_populate_overflow.sql @@ -0,0 +1,2 @@ +SELECT mapPopulateSeries([0xFFFFFFFFFFFFFFFF], [0], 0xFFFFFFFFFFFFFFFF); +SELECT mapPopulateSeries([toUInt64(1)], [1], 0xFFFFFFFFFFFFFFFF); -- { serverError 128 } diff --git a/tests/queries/0_stateless/01699_timezoneOffset.reference b/tests/queries/0_stateless/01699_timezoneOffset.reference new file mode 100644 index 00000000000..e70c5fa62ee --- /dev/null +++ b/tests/queries/0_stateless/01699_timezoneOffset.reference @@ -0,0 +1,183 @@ +DST boundary test for Europe/Moscow: +0 1981-04-01 22:40:00 10800 355002000 +1 1981-04-01 22:50:00 10800 355002600 +2 1981-04-02 00:00:00 14400 355003200 +3 1981-04-02 00:10:00 14400 355003800 +0 1981-09-30 23:00:00 14400 370724400 +1 1981-09-30 23:10:00 14400 370725000 +2 1981-09-30 23:20:00 14400 370725600 +3 1981-09-30 23:30:00 14400 370726200 +4 1981-09-30 23:40:00 14400 370726800 +5 1981-09-30 23:50:00 14400 370727400 +6 1981-09-30 23:00:00 10800 370728000 +7 1981-09-30 23:10:00 10800 370728600 +8 1981-09-30 23:20:00 10800 370729200 +9 1981-09-30 23:30:00 10800 370729800 +10 1981-09-30 23:40:00 10800 370730400 +11 1981-09-30 23:50:00 10800 370731000 +12 1981-10-01 00:00:00 10800 370731600 +13 1981-10-01 00:10:00 10800 370732200 +14 1981-10-01 00:20:00 10800 370732800 +15 1981-10-01 00:30:00 10800 370733400 +16 1981-10-01 00:40:00 10800 370734000 +17 1981-10-01 00:50:00 10800 370734600 +DST boundary test for Asia/Tehran: +0 2020-03-21 22:40:00 12600 1584817800 +1 2020-03-21 22:50:00 12600 1584818400 +2 2020-03-22 00:00:00 16200 1584819000 +3 2020-03-22 00:10:00 16200 1584819600 +0 2020-09-20 23:00:00 16200 1600626600 +1 2020-09-20 23:10:00 16200 1600627200 +2 2020-09-20 23:20:00 16200 1600627800 +3 2020-09-20 23:30:00 16200 1600628400 +4 2020-09-20 23:40:00 16200 1600629000 +5 2020-09-20 23:50:00 16200 1600629600 +6 2020-09-20 23:00:00 12600 1600630200 +7 2020-09-20 23:10:00 12600 1600630800 +8 2020-09-20 23:20:00 12600 1600631400 +9 2020-09-20 23:30:00 12600 1600632000 +10 2020-09-20 23:40:00 12600 1600632600 +11 2020-09-20 23:50:00 12600 1600633200 +12 2020-09-21 00:00:00 12600 1600633800 +13 2020-09-21 00:10:00 12600 1600634400 +14 2020-09-21 00:20:00 12600 1600635000 +15 2020-09-21 00:30:00 12600 1600635600 +16 2020-09-21 00:40:00 12600 1600636200 +17 2020-09-21 00:50:00 12600 1600636800 +DST boundary test for Australia/Lord_Howe. This is a special timezone with DST offset is 30mins with the timezone epoc also lays at half hour +37800 +39600 +DST boundary test for Australia/Lord_Howe: +0 2020-10-04 01:40:00 37800 1601737800 +1 2020-10-04 01:50:00 37800 1601738400 +2 2020-10-04 02:00:00 39600 1601739000 +3 2020-10-04 02:10:00 39600 1601739600 +0 2019-04-07 01:00:00 39600 1554559200 +1 2019-04-07 01:10:00 39600 1554559800 +2 2019-04-07 01:20:00 39600 1554560400 +3 2019-04-07 01:30:00 39600 1554561000 +4 2019-04-07 01:40:00 39600 1554561600 +5 2019-04-07 01:50:00 39600 1554562200 +6 2019-04-07 01:00:00 37800 1554562800 +7 2019-04-07 01:10:00 37800 1554563400 +8 2019-04-07 01:20:00 37800 1554564000 +9 2019-04-07 02:30:00 37800 1554564600 +10 2019-04-07 02:40:00 37800 1554565200 +11 2019-04-07 02:50:00 37800 1554565800 +12 2019-04-07 02:00:00 37800 1554566400 +13 2019-04-07 02:10:00 37800 1554567000 +14 2019-04-07 02:20:00 37800 1554567600 +15 2019-04-07 03:30:00 37800 1554568200 +16 2019-04-07 03:40:00 37800 1554568800 +17 2019-04-07 03:50:00 37800 1554569400 +4 days test in batch comparing with manually computation result for Europe/Moscow: +4 days test in batch comparing with manually computation result for Asia/Tehran: +The result maybe wrong for toDateTime processing Australia/Lord_Howe +1601739000 2020-10-04 02:00:00 39600 37800 +1601739600 2020-10-04 02:10:00 39600 37800 +1601740200 2020-10-04 02:20:00 39600 37800 +1601740800 2020-10-04 03:30:00 39600 41400 +1601741400 2020-10-04 03:40:00 39600 41400 +1601742000 2020-10-04 03:50:00 39600 41400 +1601742600 2020-10-04 03:00:00 39600 37800 +1601743200 2020-10-04 03:10:00 39600 37800 +1601743800 2020-10-04 03:20:00 39600 37800 +1601744400 2020-10-04 04:30:00 39600 41400 +1601745000 2020-10-04 04:40:00 39600 41400 +1601745600 2020-10-04 04:50:00 39600 41400 +1601746200 2020-10-04 04:00:00 39600 37800 +1601746800 2020-10-04 04:10:00 39600 37800 +1601747400 2020-10-04 04:20:00 39600 37800 +1601748000 2020-10-04 05:30:00 39600 41400 +1554562800 2019-04-07 01:00:00 37800 36000 +1554563400 2019-04-07 01:10:00 37800 36000 +1554564000 2019-04-07 01:20:00 37800 36000 +1554564600 2019-04-07 02:30:00 37800 39600 +1554565200 2019-04-07 02:40:00 37800 39600 +1554565800 2019-04-07 02:50:00 37800 39600 +1554566400 2019-04-07 02:00:00 37800 36000 +1554567000 2019-04-07 02:10:00 37800 36000 +1554567600 2019-04-07 02:20:00 37800 36000 +1554568200 2019-04-07 03:30:00 37800 39600 +1554568800 2019-04-07 03:40:00 37800 39600 +1554569400 2019-04-07 03:50:00 37800 39600 +Moscow DST Years: +11 1981-06-01 00:00:00 14400 +12 1982-06-01 00:00:00 14400 +13 1983-06-01 00:00:00 14400 +14 1984-06-01 00:00:00 14400 +15 1985-06-01 00:00:00 14400 +16 1986-06-01 00:00:00 14400 +17 1987-06-01 00:00:00 14400 +18 1988-06-01 00:00:00 14400 +19 1989-06-01 00:00:00 14400 +20 1990-06-01 00:00:00 14400 +22 1992-06-01 00:00:00 14400 +23 1993-06-01 00:00:00 14400 +24 1994-06-01 00:00:00 14400 +25 1995-06-01 00:00:00 14400 +26 1996-06-01 00:00:00 14400 +27 1997-06-01 00:00:00 14400 +28 1998-06-01 00:00:00 14400 +29 1999-06-01 00:00:00 14400 +30 2000-06-01 00:00:00 14400 +31 2001-06-01 00:00:00 14400 +32 2002-06-01 00:00:00 14400 +33 2003-06-01 00:00:00 14400 +34 2004-06-01 00:00:00 14400 +35 2005-06-01 00:00:00 14400 +36 2006-06-01 00:00:00 14400 +37 2007-06-01 00:00:00 14400 +38 2008-06-01 00:00:00 14400 +39 2009-06-01 00:00:00 14400 +40 2010-06-01 00:00:00 14400 +41 2011-06-01 00:00:00 14400 +42 2012-06-01 00:00:00 14400 +43 2013-06-01 00:00:00 14400 +44 2014-06-01 00:00:00 14400 +Moscow DST Years with perment DST from 2011-2014: +2011-01-01 00:00:00 2011-03-27 00:00:00 86 2011_10800 +2011-03-28 00:00:00 2011-12-31 00:00:00 279 2011_14400 +2012-01-01 00:00:00 2012-12-31 00:00:00 366 2012_14400 +2013-01-01 00:00:00 2013-12-31 00:00:00 365 2013_14400 +2014-01-01 00:00:00 2014-10-26 00:00:00 299 2014_14400 +2014-10-27 00:00:00 2014-12-31 00:00:00 66 2014_10800 +Tehran DST Years: +8 1978-06-01 00:00:00 18000 +9 1979-06-01 00:00:00 16200 +10 1980-06-01 00:00:00 16200 +21 1991-06-01 00:00:00 16200 +22 1992-06-01 00:00:00 16200 +23 1993-06-01 00:00:00 16200 +24 1994-06-01 00:00:00 16200 +25 1995-06-01 00:00:00 16200 +26 1996-06-01 00:00:00 16200 +27 1997-06-01 00:00:00 16200 +28 1998-06-01 00:00:00 16200 +29 1999-06-01 00:00:00 16200 +30 2000-06-01 00:00:00 16200 +31 2001-06-01 00:00:00 16200 +32 2002-06-01 00:00:00 16200 +33 2003-06-01 00:00:00 16200 +34 2004-06-01 00:00:00 16200 +35 2005-06-01 00:00:00 16200 +38 2008-06-01 00:00:00 16200 +39 2009-06-01 00:00:00 16200 +40 2010-06-01 00:00:00 16200 +41 2011-06-01 00:00:00 16200 +42 2012-06-01 00:00:00 16200 +43 2013-06-01 00:00:00 16200 +44 2014-06-01 00:00:00 16200 +45 2015-06-01 00:00:00 16200 +46 2016-06-01 00:00:00 16200 +47 2017-06-01 00:00:00 16200 +48 2018-06-01 00:00:00 16200 +49 2019-06-01 00:00:00 16200 +50 2020-06-01 00:00:00 16200 +Shanghai DST Years: +16 1986-08-01 00:00:00 32400 +17 1987-08-01 00:00:00 32400 +18 1988-08-01 00:00:00 32400 +19 1989-08-01 00:00:00 32400 +20 1990-08-01 00:00:00 32400 +21 1991-08-01 00:00:00 32400 diff --git a/tests/queries/0_stateless/01699_timezoneOffset.sql b/tests/queries/0_stateless/01699_timezoneOffset.sql new file mode 100644 index 00000000000..1b3f05ecdd7 --- /dev/null +++ b/tests/queries/0_stateless/01699_timezoneOffset.sql @@ -0,0 +1,46 @@ + +/* Test the DST(daylight saving time) offset changing boundary*/ +SELECT 'DST boundary test for Europe/Moscow:'; +SELECT number,(toDateTime('1981-04-01 22:40:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4); +SELECT number,(toDateTime('1981-09-30 23:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18); + +SELECT 'DST boundary test for Asia/Tehran:'; +SELECT number,(toDateTime('2020-03-21 22:40:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4); +SELECT number,(toDateTime('2020-09-20 23:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18); + +SELECT 'DST boundary test for Australia/Lord_Howe. This is a special timezone with DST offset is 30mins with the timezone epoc also lays at half hour'; +SELECT timezoneOffset(toDateTime('2018-08-21 22:20:00', 'Australia/Lord_Howe')); +SELECT timezoneOffset(toDateTime('2018-02-21 22:20:00', 'Australia/Lord_Howe')); + +SELECT 'DST boundary test for Australia/Lord_Howe:'; +SELECT number,(toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(4); +SELECT number,(toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS k, timezoneOffset(k) AS t, toUnixTimestamp(k) as s FROM numbers(18); + + +/* The Batch Part. Test period is whole 4 days*/ +SELECT '4 days test in batch comparing with manually computation result for Europe/Moscow:'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-04-01 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('1981-09-30 00:00:00', 'Europe/Moscow') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; + +SELECT '4 days test in batch comparing with manually computation result for Asia/Tehran:'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-03-21 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-09-20 00:00:00', 'Asia/Tehran') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(576) where res != calc; + +/* During this test we got unexpected result comes from the toDateTime() function when process the special time zone of 'Australia/Lord_Howe', which may be some kind of bugs. */ +SELECT 'The result maybe wrong for toDateTime processing Australia/Lord_Howe'; +SELECT toUnixTimestamp(x) as tt, (toDateTime('2020-10-04 01:40:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc; +SELECT toUnixTimestamp(x) as tt, (toDateTime('2019-04-07 01:00:00', 'Australia/Lord_Howe') + INTERVAL number * 600 SECOND) AS x, timezoneOffset(x) as res,(toDateTime(toString(x), 'UTC') - x ) AS calc FROM numbers(18) where res != calc; + + +/* Find all the years had followed DST during given period*/ + +SELECT 'Moscow DST Years:'; +SELECT number, (toDateTime('1970-06-01 00:00:00', 'Europe/Moscow') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 10800; +SELECT 'Moscow DST Years with perment DST from 2011-2014:'; +SELECT min((toDateTime('2011-01-01 00:00:00', 'Europe/Moscow') + INTERVAL number DAY) as day) as start, max(day) as end, count(1), concat(toString(toYear(day)),'_',toString(timezoneOffset(day)))as DST from numbers(365*4+1) group by DST order by start; + +SELECT 'Tehran DST Years:'; +SELECT number, (toDateTime('1970-06-01 00:00:00', 'Asia/Tehran') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 12600; +SELECT 'Shanghai DST Years:'; +SELECT number, (toDateTime('1970-08-01 00:00:00', 'Asia/Shanghai') + INTERVAL number YEAR) AS DST_Y, timezoneOffset(DST_Y) AS t FROM numbers(51) where t != 28800; + diff --git a/tests/queries/0_stateless/01700_deltasum.reference b/tests/queries/0_stateless/01700_deltasum.reference new file mode 100644 index 00000000000..be5b176c627 --- /dev/null +++ b/tests/queries/0_stateless/01700_deltasum.reference @@ -0,0 +1,9 @@ +2 +6 +7 +7 +7 +5 +2 +2.25 +6.5 diff --git a/tests/queries/0_stateless/01700_deltasum.sql b/tests/queries/0_stateless/01700_deltasum.sql new file mode 100644 index 00000000000..93edb2e477d --- /dev/null +++ b/tests/queries/0_stateless/01700_deltasum.sql @@ -0,0 +1,9 @@ +select deltaSum(arrayJoin([1, 2, 3])); +select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4])); +select deltaSum(arrayJoin([1, 2, 3, 0, 3, 4, 2, 3])); +select deltaSum(arrayJoin([1, 2, 3, 0, 3, 3, 3, 3, 3, 4, 2, 3])); +select deltaSum(arrayJoin([1, 2, 3, 0, 0, 0, 0, 3, 3, 3, 3, 3, 4, 2, 3])); +select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([0, 1])) as rows union all select deltaSumState(arrayJoin([4, 5])) as rows); +select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([4, 5])) as rows union all select deltaSumState(arrayJoin([0, 1])) as rows); +select deltaSum(arrayJoin([2.25, 3, 4.5])); +select deltaSumMerge(rows) from (select deltaSumState(arrayJoin([0.1, 0.3, 0.5])) as rows union all select deltaSumState(arrayJoin([4.1, 5.1, 6.6])) as rows); diff --git a/tests/queries/0_stateless/01700_mod_negative_type_promotion.reference b/tests/queries/0_stateless/01700_mod_negative_type_promotion.reference new file mode 100644 index 00000000000..b3a93b2203a --- /dev/null +++ b/tests/queries/0_stateless/01700_mod_negative_type_promotion.reference @@ -0,0 +1,9 @@ +-199 Int16 +-199 Int32 +97 Int64 +17 Int64 +-199 Int32 +9 UInt8 +199 UInt16 +-199 Float64 +-199 Float64 diff --git a/tests/queries/0_stateless/01700_mod_negative_type_promotion.sql b/tests/queries/0_stateless/01700_mod_negative_type_promotion.sql new file mode 100644 index 00000000000..93bb7071659 --- /dev/null +++ b/tests/queries/0_stateless/01700_mod_negative_type_promotion.sql @@ -0,0 +1,12 @@ +SELECT toInt32(-199) % 200 as k, toTypeName(k); +SELECT toInt32(-199) % toUInt16(200) as k, toTypeName(k); +SELECT toInt32(-199) % toUInt32(200) as k, toTypeName(k); +SELECT toInt32(-199) % toUInt64(200) as k, toTypeName(k); + +SELECT toInt32(-199) % toInt16(-200) as k, toTypeName(k); + +SELECT 199 % -10 as k, toTypeName(k); +SELECT 199 % -200 as k, toTypeName(k); + +SELECT toFloat64(-199) % 200 as k, toTypeName(k); +SELECT -199 % toFloat64(200) as k, toTypeName(k); diff --git a/tests/queries/0_stateless/01700_point_in_polygon_ubsan.reference b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql new file mode 100644 index 00000000000..97db40ab65e --- /dev/null +++ b/tests/queries/0_stateless/01700_point_in_polygon_ubsan.sql @@ -0,0 +1 @@ +SELECT pointInPolygon((0, 0), [[(0, 0), (10, 10), (256, -9223372036854775808)]]) FORMAT Null; diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference new file mode 100644 index 00000000000..2fc177c812e --- /dev/null +++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.reference @@ -0,0 +1,16 @@ +block_numbers +blocks +1 +======== +block_numbers +blocks +1 +======== +block_numbers +blocks +======== +1 +failed_parts +last_part +leader_election-0000000000 +parallel diff --git a/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql new file mode 100644 index 00000000000..d4126098c7c --- /dev/null +++ b/tests/queries/0_stateless/01700_system_zookeeper_path_in.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS sample_table; + +CREATE TABLE sample_table ( + key UInt64 +) +ENGINE ReplicatedMergeTree('/clickhouse/01700_system_zookeeper_path_in', '1') +ORDER BY tuple(); + +SELECT name FROM system.zookeeper WHERE path = '/clickhouse/01700_system_zookeeper_path_in' AND name like 'block%' ORDER BY name; +SELECT name FROM system.zookeeper WHERE path = '/clickhouse/01700_system_zookeeper_path_in/replicas' ORDER BY name; +SELECT '========'; +SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/01700_system_zookeeper_path_in') AND name LIKE 'block%' ORDER BY name; +SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/01700_system_zookeeper_path_in/replicas') ORDER BY name; +SELECT '========'; +SELECT name FROM system.zookeeper WHERE path IN ('/clickhouse/01700_system_zookeeper_path_in','/clickhouse/01700_system_zookeeper_path_in/replicas') AND name LIKE 'block%' ORDER BY name; +SELECT '========'; +SELECT name FROM system.zookeeper WHERE path IN (SELECT concat('/clickhouse/01700_system_zookeeper_path_in/', name) FROM system.zookeeper WHERE (path = '/clickhouse/01700_system_zookeeper_path_in')) ORDER BY name; + +DROP TABLE IF EXISTS sample_table; diff --git a/tests/queries/0_stateless/01701_if_tuple_segfault.reference b/tests/queries/0_stateless/01701_if_tuple_segfault.reference new file mode 100644 index 00000000000..001e50da954 --- /dev/null +++ b/tests/queries/0_stateless/01701_if_tuple_segfault.reference @@ -0,0 +1,3 @@ +2020-10-01 19:20:30 hello ([0],45) 45 ([0,1,2,3,4,5,6,7,8,9,10,11,12],[45,55,65,75,85,95,105,115,125,135,145,155,165]) +([3],4) +2020-10-01 19:20:30 hello ([0],45) 5 ([0,1,2,3,4,5,6,7,8,9,10,11,12],[22,27,32,37,42,47,52,57,62,67,72,77,82]) diff --git a/tests/queries/0_stateless/01701_if_tuple_segfault.sql b/tests/queries/0_stateless/01701_if_tuple_segfault.sql new file mode 100644 index 00000000000..93b28c578a9 --- /dev/null +++ b/tests/queries/0_stateless/01701_if_tuple_segfault.sql @@ -0,0 +1,33 @@ +DROP TABLE IF EXISTS agg_table; + +CREATE TABLE IF NOT EXISTS agg_table +( + time DateTime CODEC(DoubleDelta, LZ4), + xxx String, + two_values Tuple(Array(UInt16), UInt32), + agg_simple SimpleAggregateFunction(sum, UInt64), + agg SimpleAggregateFunction(sumMap, Tuple(Array(Int16), Array(UInt64))) +) +ENGINE = AggregatingMergeTree() +ORDER BY (xxx, time); + +INSERT INTO agg_table SELECT toDateTime('2020-10-01 19:20:30'), 'hello', ([any(number)], sum(number)), sum(number), + sumMap((arrayMap(i -> toString(i), range(13)), arrayMap(i -> (number + i), range(13)))) FROM numbers(10); + +SELECT * FROM agg_table; + +SELECT if(xxx = 'x', ([2], 3), ([3], 4)) FROM agg_table; + +SELECT if(xxx = 'x', ([2], 3), ([3], 4, 'q', 'w', 7)) FROM agg_table; --{ serverError 386 } + +ALTER TABLE agg_table UPDATE two_values = (two_values.1, two_values.2) WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2; + +ALTER TABLE agg_table UPDATE agg_simple = 5 WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2; + +ALTER TABLE agg_table UPDATE agg = (agg.1, agg.2) WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2; + +ALTER TABLE agg_table UPDATE agg = (agg.1, arrayMap(x -> toUInt64(x / 2), agg.2)) WHERE time BETWEEN toDateTime('2020-08-01 00:00:00') AND toDateTime('2020-12-01 00:00:00') SETTINGS mutations_sync = 2; + +SELECT * FROM agg_table; + +DROP TABLE IF EXISTS agg_table; diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference new file mode 100644 index 00000000000..587579af915 --- /dev/null +++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.reference @@ -0,0 +1 @@ +Ok. diff --git a/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh new file mode 100755 index 00000000000..d3e634eb560 --- /dev/null +++ b/tests/queries/0_stateless/01701_parallel_parsing_infinite_segmentation.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -q "create table insert_big_json(a String, b String) engine=MergeTree() order by tuple()"; + +python3 -c "[print('{{\"a\":\"{}\", \"b\":\"{}\"'.format('clickhouse'* 1000000, 'dbms' * 1000000)) for i in range(10)]; [print('{{\"a\":\"{}\", \"b\":\"{}\"}}'.format('clickhouse'* 100000, 'dbms' * 100000)) for i in range(10)]" 2>/dev/null | ${CLICKHOUSE_CLIENT} --input_format_parallel_parsing=1 --max_memory_usage=0 -q "insert into insert_big_json FORMAT JSONEachRow" 2>&1 | grep -q "min_chunk_bytes_for_parallel_parsing" && echo "Ok." || echo "FAIL" ||: \ No newline at end of file diff --git a/tests/queries/0_stateless/01702_bitmap_native_integers.reference b/tests/queries/0_stateless/01702_bitmap_native_integers.reference new file mode 100644 index 00000000000..5be3912b8d5 --- /dev/null +++ b/tests/queries/0_stateless/01702_bitmap_native_integers.reference @@ -0,0 +1 @@ +251 65531 4294967291 18446744073709551611 255 65535 4294967295 18446744073709551615 diff --git a/tests/queries/0_stateless/01702_bitmap_native_integers.sql b/tests/queries/0_stateless/01702_bitmap_native_integers.sql new file mode 100644 index 00000000000..a31de25dc30 --- /dev/null +++ b/tests/queries/0_stateless/01702_bitmap_native_integers.sql @@ -0,0 +1,5 @@ +drop table if exists t; +create table t(i8 Int8, i16 Int16, i32 Int32, i64 Int64) engine Memory; +insert into t values (-1, -1, -1, -1), (-2, -2, -2, -2), (-3, -3, -3, -3), (-4, -4, -4, -4), (-5, -5, -5, -5); +select * apply bitmapMin, * apply bitmapMax from (select * apply groupBitmapState from t); +drop table t; diff --git a/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference new file mode 100644 index 00000000000..2bdcedba90e --- /dev/null +++ b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.reference @@ -0,0 +1,23 @@ +SELECT avg(number + 2) FROM numbers(10) +value: 6.5 +EXPLAIN syntax: +SELECT avg(number) + 2 +FROM numbers(10) + +SELECT avg(number - 2) FROM numbers(10) +value: 2.5 +EXPLAIN syntax: +SELECT avg(number) - 2 +FROM numbers(10) + +SELECT avg(number * 2) FROM numbers(10) +value: 9 +EXPLAIN syntax: +SELECT avg(number) * 2 +FROM numbers(10) + +SELECT avg(number / 2) FROM numbers(10) +value: 2.25 +EXPLAIN syntax: +SELECT avg(number) / 2 +FROM numbers(10) diff --git a/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql new file mode 100644 index 00000000000..8fa4dd0ae47 --- /dev/null +++ b/tests/queries/0_stateless/01702_rewrite_avg_for_algebraic_optimization.sql @@ -0,0 +1,22 @@ +SELECT 'SELECT avg(number + 2) FROM numbers(10)'; +SELECT 'value: ', avg(number + 2) FROM numbers(10); +SELECT 'EXPLAIN syntax:'; +EXPLAIN SYNTAX SELECT avg(number + 2) FROM numbers(10); + +SELECT ''; +SELECT 'SELECT avg(number - 2) FROM numbers(10)'; +SELECT 'value: ', avg(number - 2) FROM numbers(10); +SELECT 'EXPLAIN syntax:'; +EXPLAIN SYNTAX SELECT avg(number - 2) FROM numbers(10); + +SELECT ''; +SELECT 'SELECT avg(number * 2) FROM numbers(10)'; +SELECT 'value: ', avg(number * 2) FROM numbers(10); +SELECT 'EXPLAIN syntax:'; +EXPLAIN SYNTAX SELECT avg(number * 2) FROM numbers(10); + +SELECT ''; +SELECT 'SELECT avg(number / 2) FROM numbers(10)'; +SELECT 'value: ', avg(number / 2) FROM numbers(10); +SELECT 'EXPLAIN syntax:'; +EXPLAIN SYNTAX SELECT avg(number / 2) FROM numbers(10); diff --git a/tests/queries/0_stateless/01702_system_numbers_scientific_notation.reference b/tests/queries/0_stateless/01702_system_numbers_scientific_notation.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql b/tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql new file mode 100644 index 00000000000..6e037ee4a2e --- /dev/null +++ b/tests/queries/0_stateless/01702_system_numbers_scientific_notation.sql @@ -0,0 +1,5 @@ +select * from numbers(1e2) format Null; +select * from numbers_mt(1e2) format Null; +select * from numbers_mt('100') format Null; -- { serverError 43 } +select * from numbers_mt(inf) format Null; -- { serverError 43 } +select * from numbers_mt(nan) format Null; -- { serverError 43 } diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference new file mode 100644 index 00000000000..228086615da --- /dev/null +++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.reference @@ -0,0 +1,9 @@ +-- { echo } +SELECT toString(toDateTime('-922337203.6854775808', 1)); +2106-02-07 15:41:33.6 +SELECT toString(toDateTime('9922337203.6854775808', 1)); +2104-12-30 00:50:11.6 +SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1); +2106-02-07 20:50:08.1 +SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1); +2011-12-23 00:38:08.1 diff --git a/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql new file mode 100644 index 00000000000..d1f0416149a --- /dev/null +++ b/tests/queries/0_stateless/01702_toDateTime_from_string_clamping.sql @@ -0,0 +1,5 @@ +-- { echo } +SELECT toString(toDateTime('-922337203.6854775808', 1)); +SELECT toString(toDateTime('9922337203.6854775808', 1)); +SELECT toDateTime64(CAST('10000000000.1' AS Decimal64(1)), 1); +SELECT toDateTime64(CAST('-10000000000.1' AS Decimal64(1)), 1); diff --git a/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference new file mode 100644 index 00000000000..e92a057f8c3 --- /dev/null +++ b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.reference @@ -0,0 +1,6 @@ +22.5 +SELECT sum(number) / 2 +FROM numbers(10) +22.5 +SELECT sum(number) / 2 +FROM numbers(10) diff --git a/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql new file mode 100644 index 00000000000..1c5271b4717 --- /dev/null +++ b/tests/queries/0_stateless/01703_rewrite_aggregate_function_case_insensitive.sql @@ -0,0 +1,6 @@ +SELECT sum(number / 2) FROM numbers(10); +EXPLAIN SYNTAX SELECT sum(number / 2) FROM numbers(10); + + +SELECT Sum(number / 2) FROM numbers(10); +EXPLAIN SYNTAX SELECT Sum(number / 2) FROM numbers(10); diff --git a/tests/queries/0_stateless/01704_transform_with_float_key.reference b/tests/queries/0_stateless/01704_transform_with_float_key.reference new file mode 100644 index 00000000000..761e15c903c --- /dev/null +++ b/tests/queries/0_stateless/01704_transform_with_float_key.reference @@ -0,0 +1,30 @@ +- +Hello +- +World +- +- +- +- +- +- +- +- +Hello +- +World +- +- +- +- +- +- +- +Hello +- +World +- +- +- +- +- diff --git a/tests/queries/0_stateless/01704_transform_with_float_key.sql b/tests/queries/0_stateless/01704_transform_with_float_key.sql new file mode 100644 index 00000000000..690c73ee28a --- /dev/null +++ b/tests/queries/0_stateless/01704_transform_with_float_key.sql @@ -0,0 +1,3 @@ +SELECT transform(number / 2, [0.5, 1.5], ['Hello', 'World'], '-') FROM numbers(10); +SELECT transform(number / 2, [1.0, 2.0], ['Hello', 'World'], '-') FROM numbers(10); +SELECT transform(number / 2, [1, 2], ['Hello', 'World'], '-') FROM numbers(10); diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference new file mode 100644 index 00000000000..5b0f7bdeb2d --- /dev/null +++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.reference @@ -0,0 +1,66 @@ +SELECT + CAST(1, 'INT'), + ceil(1), + ceil(1), + char(49), + CHAR_LENGTH('1'), + CHARACTER_LENGTH('1'), + coalesce(1), + concat('1', '1'), + corr(1, 1), + cos(1), + count(), + covarPop(1, 1), + covarSamp(1, 1), + DATABASE(), + dateDiff('DAY', toDate('2020-10-24'), toDate('2019-10-24')), + exp(1), + arrayFlatten([[1]]), + floor(1), + FQDN(), + greatest(1), + 1, + ifNull(1, 1), + lower('A'), + least(1), + length('1'), + log(1), + position('1', '1'), + log(1), + log10(1), + log2(1), + lower('A'), + max(1), + substring('123', 1, 1), + min(1), + 1 % 1, + NOT 1, + now(), + now64(), + nullIf(1, 1), + pi(), + position('123', '2'), + pow(1, 1), + pow(1, 1), + rand(), + replaceAll('1', '1', '2'), + reverse('123'), + round(1), + sin(1), + sqrt(1), + stddevPop(1), + stddevSamp(1), + substring('123', 2), + substring('123', 2), + count(), + tan(1), + tanh(1), + trunc(1), + trunc(1), + upper('A'), + upper('A'), + currentUser(), + varPop(1), + varSamp(1), + toWeek(toDate('2020-10-24')), + toYearWeek(toDate('2020-10-24')) diff --git a/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql new file mode 100644 index 00000000000..9b35087182c --- /dev/null +++ b/tests/queries/0_stateless/01705_normalize_case_insensitive_function_names.sql @@ -0,0 +1 @@ +EXPLAIN SYNTAX SELECT CAST(1 AS INT), CEIL(1), CEILING(1), CHAR(49), CHAR_LENGTH('1'), CHARACTER_LENGTH('1'), COALESCE(1), CONCAT('1', '1'), CORR(1, 1), COS(1), COUNT(1), COVAR_POP(1, 1), COVAR_SAMP(1, 1), DATABASE(), DATEDIFF('DAY', toDate('2020-10-24'), toDate('2019-10-24')), EXP(1), FLATTEN([[1]]), FLOOR(1), FQDN(), GREATEST(1), IF(1, 1, 1), IFNULL(1, 1), LCASE('A'), LEAST(1), LENGTH('1'), LN(1), LOCATE('1', '1'), LOG(1), LOG10(1), LOG2(1), LOWER('A'), MAX(1), MID('123', 1, 1), MIN(1), MOD(1, 1), NOT(1), NOW(), NOW64(), NULLIF(1, 1), PI(), POSITION('123', '2'), POW(1, 1), POWER(1, 1), RAND(), REPLACE('1', '1', '2'), REVERSE('123'), ROUND(1), SIN(1), SQRT(1), STDDEV_POP(1), STDDEV_SAMP(1), SUBSTR('123', 2), SUBSTRING('123', 2), SUM(1), TAN(1), TANH(1), TRUNC(1), TRUNCATE(1), UCASE('A'), UPPER('A'), USER(), VAR_POP(1), VAR_SAMP(1), WEEK(toDate('2020-10-24')), YEARWEEK(toDate('2020-10-24')) format TSVRaw; diff --git a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference new file mode 100644 index 00000000000..0343ad84abb --- /dev/null +++ b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.reference @@ -0,0 +1,6 @@ +SELECT + count(), + count(), + count(), + count(), + count(NULL) diff --git a/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql new file mode 100644 index 00000000000..d20f23feef8 --- /dev/null +++ b/tests/queries/0_stateless/01706_optimize_normalize_count_variants.sql @@ -0,0 +1,4 @@ + +set optimize_normalize_count_variants = 1; + +explain syntax select count(), count(1), count(-1), sum(1), count(null); diff --git a/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.reference b/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql b/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql new file mode 100644 index 00000000000..fad890c4807 --- /dev/null +++ b/tests/queries/0_stateless/01709_inactive_parts_to_delay_throw.sql @@ -0,0 +1,12 @@ +drop table if exists x; + +create table x (i int) engine MergeTree order by i settings old_parts_lifetime = 10000000000, min_bytes_for_wide_part = 0, inactive_parts_to_throw_insert = 1; + +insert into x values (1); +insert into x values (2); + +optimize table x final; + +insert into x values (3); -- { serverError 252; } + +drop table if exists x; diff --git a/tests/queries/0_stateless/01710_join_use_nulls.reference b/tests/queries/0_stateless/01710_join_use_nulls.reference new file mode 100644 index 00000000000..8bd111e0416 --- /dev/null +++ b/tests/queries/0_stateless/01710_join_use_nulls.reference @@ -0,0 +1,3 @@ +3 +1 +1 diff --git a/tests/queries/0_stateless/01710_join_use_nulls.sql b/tests/queries/0_stateless/01710_join_use_nulls.sql new file mode 100644 index 00000000000..b024227d4e2 --- /dev/null +++ b/tests/queries/0_stateless/01710_join_use_nulls.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS X; +DROP TABLE IF EXISTS Y; + +CREATE TABLE X (id Int) ENGINE=Memory; +CREATE TABLE Y (id Int) ENGINE=Memory; + +-- Type mismatch of columns to JOIN by: plus(id, 1) Int64 at left, Y.id Int32 at right. +SELECT Y.id - 1 FROM X RIGHT JOIN Y ON (X.id + 1) = Y.id SETTINGS join_use_nulls=1; -- { serverError 53 } +SELECT Y.id - 1 FROM X RIGHT JOIN Y ON (X.id + 1) = toInt64(Y.id) SETTINGS join_use_nulls=1; + +-- Logical error: 'Arguments of 'plus' have incorrect data types: '2' of type 'UInt8', '1' of type 'UInt8''. +-- Because 1 became toNullable(1), i.e.: +-- 2 UInt8 Const(size = 1, UInt8(size = 1)) +-- 1 UInt8 Const(size = 1, Nullable(size = 1, UInt8(size = 1), UInt8(size = 1))) +SELECT 2+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy+1 = Y.dummy SETTINGS join_use_nulls = 1; -- { serverError 53 } +SELECT 2+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy+1 = toUInt16(Y.dummy) SETTINGS join_use_nulls = 1; +SELECT X.dummy+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy = Y.dummy SETTINGS join_use_nulls = 1; +SELECT Y.dummy+1 FROM system.one X RIGHT JOIN system.one Y ON X.dummy = Y.dummy SETTINGS join_use_nulls = 1; + +DROP TABLE X; +DROP TABLE Y; diff --git a/tests/queries/0_stateless/01711_cte_subquery_fix.reference b/tests/queries/0_stateless/01711_cte_subquery_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01711_cte_subquery_fix.sql b/tests/queries/0_stateless/01711_cte_subquery_fix.sql new file mode 100644 index 00000000000..ddea548eada --- /dev/null +++ b/tests/queries/0_stateless/01711_cte_subquery_fix.sql @@ -0,0 +1,3 @@ +drop table if exists t; +create table t engine = Memory as with cte as (select * from numbers(10)) select * from cte; +drop table t; diff --git a/tests/queries/0_stateless/01711_decimal_multiplication.reference b/tests/queries/0_stateless/01711_decimal_multiplication.reference new file mode 100644 index 00000000000..37869329ca4 --- /dev/null +++ b/tests/queries/0_stateless/01711_decimal_multiplication.reference @@ -0,0 +1,4 @@ +2.0000 +2.0000 +2.0000 +2.0000 diff --git a/tests/queries/0_stateless/01711_decimal_multiplication.sql b/tests/queries/0_stateless/01711_decimal_multiplication.sql new file mode 100644 index 00000000000..10d23599b4d --- /dev/null +++ b/tests/queries/0_stateless/01711_decimal_multiplication.sql @@ -0,0 +1,4 @@ +SELECT materialize(toDecimal64(4,4)) - materialize(toDecimal32(2,2)); +SELECT toDecimal64(4,4) - materialize(toDecimal32(2,2)); +SELECT materialize(toDecimal64(4,4)) - toDecimal32(2,2); +SELECT toDecimal64(4,4) - toDecimal32(2,2); diff --git a/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference new file mode 100644 index 00000000000..51acb066394 --- /dev/null +++ b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.reference @@ -0,0 +1,6 @@ +1 1 +2 2 +1 1 +2 2 +1 1 +2 2 diff --git a/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql new file mode 100644 index 00000000000..0acf6992c1e --- /dev/null +++ b/tests/queries/0_stateless/01712_no_adaptive_granularity_vertical_merge.sql @@ -0,0 +1,30 @@ +DROP TABLE IF EXISTS old_school_table; + +CREATE TABLE old_school_table +( + key UInt64, + value String +) +ENGINE = MergeTree() +ORDER BY key +SETTINGS index_granularity_bytes = 0, enable_mixed_granularity_parts = 0, min_bytes_for_wide_part = 0, +vertical_merge_algorithm_min_rows_to_activate = 1, vertical_merge_algorithm_min_columns_to_activate = 1; + +INSERT INTO old_school_table VALUES (1, '1'); +INSERT INTO old_school_table VALUES (2, '2'); + +OPTIMIZE TABLE old_school_table FINAL; + +SELECT * FROM old_school_table ORDER BY key; + +OPTIMIZE TABLE old_school_table FINAL; -- just to be sure + +SELECT * FROM old_school_table ORDER BY key; + +ALTER TABLE old_school_table MODIFY SETTING vertical_merge_algorithm_min_rows_to_activate = 10000, vertical_merge_algorithm_min_columns_to_activate = 10000; + +OPTIMIZE TABLE old_school_table FINAL; -- and horizontal merge + +SELECT * FROM old_school_table ORDER BY key; + +DROP TABLE IF EXISTS old_school_table; diff --git a/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql new file mode 100644 index 00000000000..7d4c83c9d3a --- /dev/null +++ b/tests/queries/0_stateless/01713_table_ttl_old_syntax_zookeeper.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS ttl_table; + +CREATE TABLE ttl_table +( + date Date, + value UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01713_table_ttl', '1', date, date, 8192) +TTL date + INTERVAL 2 MONTH; --{ serverError 36 } + +CREATE TABLE ttl_table +( + date Date, + value UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01713_table_ttl', '1', date, date, 8192) +PARTITION BY date; --{ serverError 42 } + +CREATE TABLE ttl_table +( + date Date, + value UInt64 +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_01713_table_ttl', '1', date, date, 8192) +ORDER BY value; --{ serverError 42 } + +SELECT 1; + +DROP TABLE IF EXISTS ttl_table; diff --git a/tests/queries/0_stateless/01714_alter_drop_version.reference b/tests/queries/0_stateless/01714_alter_drop_version.reference new file mode 100644 index 00000000000..72749c905a3 --- /dev/null +++ b/tests/queries/0_stateless/01714_alter_drop_version.reference @@ -0,0 +1 @@ +1 1 1 diff --git a/tests/queries/0_stateless/01714_alter_drop_version.sql b/tests/queries/0_stateless/01714_alter_drop_version.sql new file mode 100644 index 00000000000..e3d5db33859 --- /dev/null +++ b/tests/queries/0_stateless/01714_alter_drop_version.sql @@ -0,0 +1,23 @@ +DROP TABLE IF EXISTS alter_drop_version; + +CREATE TABLE alter_drop_version +( + `key` UInt64, + `value` String, + `ver` Int8 +) +ENGINE = ReplacingMergeTree(ver) +ORDER BY key; + +INSERT INTO alter_drop_version VALUES (1, '1', 1); + +ALTER TABLE alter_drop_version DROP COLUMN ver; --{serverError 524} +ALTER TABLE alter_drop_version RENAME COLUMN ver TO rev; --{serverError 524} + +DETACH TABLE alter_drop_version; + +ATTACH TABLE alter_drop_version; + +SELECT * FROM alter_drop_version; + +DROP TABLE IF EXISTS alter_drop_version; diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql new file mode 100644 index 00000000000..66b53369517 --- /dev/null +++ b/tests/queries/0_stateless/01715_background_checker_blather_zookeeper.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS i20203_1; +DROP TABLE IF EXISTS i20203_2; + +CREATE TABLE i20203_1 (a Int8) +ENGINE = ReplicatedMergeTree('/clickhouse/01715_background_checker_i20203', 'r1') +ORDER BY tuple(); + +CREATE TABLE i20203_2 (a Int8) +ENGINE = ReplicatedMergeTree('/clickhouse/01715_background_checker_i20203', 'r2') +ORDER BY tuple(); + +DETACH TABLE i20203_2; +INSERT INTO i20203_1 VALUES (2); + +DETACH TABLE i20203_1; +ATTACH TABLE i20203_2; + +-- sleep 10 seconds +SELECT number from numbers(10) where sleepEachRow(1) Format Null; + +SELECT num_tries < 50 +FROM system.replication_queue +WHERE table = 'i20203_2' AND database = currentDatabase(); + +ATTACH TABLE i20203_1; + +DROP TABLE IF EXISTS i20203_1; +DROP TABLE IF EXISTS i20203_2; diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.reference b/tests/queries/0_stateless/01715_table_function_view_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01715_table_function_view_fix.sql b/tests/queries/0_stateless/01715_table_function_view_fix.sql new file mode 100644 index 00000000000..de5150b7b70 --- /dev/null +++ b/tests/queries/0_stateless/01715_table_function_view_fix.sql @@ -0,0 +1 @@ +SELECT view(SELECT 1); -- { clientError 62 } diff --git a/tests/queries/0_stateless/01716_array_difference_overflow.reference b/tests/queries/0_stateless/01716_array_difference_overflow.reference new file mode 100644 index 00000000000..5297534679e --- /dev/null +++ b/tests/queries/0_stateless/01716_array_difference_overflow.reference @@ -0,0 +1 @@ +[0,9223372036854710272] diff --git a/tests/queries/0_stateless/01716_array_difference_overflow.sql b/tests/queries/0_stateless/01716_array_difference_overflow.sql new file mode 100644 index 00000000000..3d153725294 --- /dev/null +++ b/tests/queries/0_stateless/01716_array_difference_overflow.sql @@ -0,0 +1,2 @@ +-- Overflow is Ok and behaves as the CPU does it. +SELECT arrayDifference([65536, -9223372036854775808]); diff --git a/tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql new file mode 100644 index 00000000000..f68d9de1995 --- /dev/null +++ b/tests/queries/0_stateless/01716_decimal_comparison_ubsan.sql @@ -0,0 +1,2 @@ +SET decimal_check_overflow = 0; +SELECT toDecimal64(0, 8) = 9223372036854775807; diff --git a/tests/queries/0_stateless/01716_drop_rename_sign_column.reference b/tests/queries/0_stateless/01716_drop_rename_sign_column.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01716_drop_rename_sign_column.sql b/tests/queries/0_stateless/01716_drop_rename_sign_column.sql new file mode 100644 index 00000000000..c9119ee2b46 --- /dev/null +++ b/tests/queries/0_stateless/01716_drop_rename_sign_column.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS signed_table; + +CREATE TABLE signed_table ( + k UInt32, + v String, + s Int8 +) ENGINE CollapsingMergeTree(s) ORDER BY k; + +INSERT INTO signed_table(k, v, s) VALUES (1, 'a', 1); + +ALTER TABLE signed_table DROP COLUMN s; --{serverError 524} +ALTER TABLE signed_table RENAME COLUMN s TO s1; --{serverError 524} + +DROP TABLE IF EXISTS signed_table; diff --git a/tests/queries/0_stateless/01717_global_with_subquery_fix.reference b/tests/queries/0_stateless/01717_global_with_subquery_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01717_global_with_subquery_fix.sql b/tests/queries/0_stateless/01717_global_with_subquery_fix.sql new file mode 100644 index 00000000000..14c4ac3e4ca --- /dev/null +++ b/tests/queries/0_stateless/01717_global_with_subquery_fix.sql @@ -0,0 +1 @@ +WITH (SELECT count(distinct colU) from tabA) AS withA, (SELECT count(distinct colU) from tabA) AS withB SELECT withA / withB AS ratio FROM (SELECT date AS period, colX FROM (SELECT date, if(colA IN (SELECT colB FROM tabC), 0, colA) AS colX FROM tabB) AS tempB GROUP BY period, colX) AS main; -- {serverError 60} diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.reference b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql new file mode 100644 index 00000000000..c4f26a079f0 --- /dev/null +++ b/tests/queries/0_stateless/01717_int_div_float_too_large_ubsan.sql @@ -0,0 +1,2 @@ +SELECT intDiv(9223372036854775807, 0.9998999834060669); -- { serverError 153 } +SELECT intDiv(9223372036854775807, 1.); -- { serverError 153 } diff --git a/tests/queries/0_stateless/01718_subtract_seconds_date.reference b/tests/queries/0_stateless/01718_subtract_seconds_date.reference new file mode 100644 index 00000000000..97e3da8cc48 --- /dev/null +++ b/tests/queries/0_stateless/01718_subtract_seconds_date.reference @@ -0,0 +1,2 @@ +2021-02-14 23:59:59 +10 diff --git a/tests/queries/0_stateless/01718_subtract_seconds_date.sql b/tests/queries/0_stateless/01718_subtract_seconds_date.sql new file mode 100644 index 00000000000..6bffcd4db5a --- /dev/null +++ b/tests/queries/0_stateless/01718_subtract_seconds_date.sql @@ -0,0 +1,2 @@ +SELECT subtractSeconds(toDate('2021-02-15'), 1); +SELECT subtractSeconds(today(), 1) - subtractSeconds(today(), 11); diff --git a/tests/queries/0_stateless/01719_join_timezone.reference b/tests/queries/0_stateless/01719_join_timezone.reference new file mode 100644 index 00000000000..c2702a38012 --- /dev/null +++ b/tests/queries/0_stateless/01719_join_timezone.reference @@ -0,0 +1,3 @@ +2020-05-13 13:38:45 2020-05-13 16:38:45 +2020-05-13 13:38:45 2020-05-13 16:38:45 +2020-05-13 13:38:45 2020-05-13 16:38:45 diff --git a/tests/queries/0_stateless/01719_join_timezone.sql b/tests/queries/0_stateless/01719_join_timezone.sql new file mode 100644 index 00000000000..cbf0c27fcfc --- /dev/null +++ b/tests/queries/0_stateless/01719_join_timezone.sql @@ -0,0 +1,45 @@ +DROP TABLE IF EXISTS test; + +CREATE TABLE test (timestamp DateTime('UTC'), i UInt8) Engine=MergeTree() PARTITION BY toYYYYMM(timestamp) ORDER BY (i); +INSERT INTO test values ('2020-05-13 16:38:45', 1); + +SELECT + toTimeZone(timestamp, 'America/Sao_Paulo') AS converted, + timestamp AS original +FROM test +LEFT JOIN (SELECT 2 AS x) AS anything ON x = i +WHERE timestamp >= toDateTime('2020-05-13T00:00:00', 'America/Sao_Paulo'); + +/* This was incorrect result in previous ClickHouse versions: +┌─converted───────────┬─original────────────┐ +│ 2020-05-13 16:38:45 │ 2020-05-13 16:38:45 │ <-- toTimeZone is ignored. +└─────────────────────┴─────────────────────┘ +*/ + +SELECT + toTimeZone(timestamp, 'America/Sao_Paulo') AS converted, + timestamp AS original +FROM test +-- LEFT JOIN (SELECT 2 AS x) AS anything ON x = i -- Removing the join fixes the issue. +WHERE timestamp >= toDateTime('2020-05-13T00:00:00', 'America/Sao_Paulo'); + +/* +┌─converted───────────┬─original────────────┐ +│ 2020-05-13 13:38:45 │ 2020-05-13 16:38:45 │ <-- toTimeZone works. +└─────────────────────┴─────────────────────┘ +*/ + +SELECT + toTimeZone(timestamp, 'America/Sao_Paulo') AS converted, + timestamp AS original +FROM test +LEFT JOIN (SELECT 2 AS x) AS anything ON x = i +WHERE timestamp >= '2020-05-13T00:00:00'; -- Not using toDateTime in the WHERE also fixes the issue. + +/* +┌─converted───────────┬─original────────────┐ +│ 2020-05-13 13:38:45 │ 2020-05-13 16:38:45 │ <-- toTimeZone works. +└─────────────────────┴─────────────────────┘ +*/ + +DROP TABLE test; diff --git a/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference new file mode 100644 index 00000000000..38abe3c9f52 --- /dev/null +++ b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.reference @@ -0,0 +1 @@ +1 First diff --git a/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql new file mode 100644 index 00000000000..a0a4fbbfab9 --- /dev/null +++ b/tests/queries/0_stateless/01720_dictionary_create_source_with_functions.sql @@ -0,0 +1,28 @@ +DROP DATABASE IF EXISTS 01720_dictionary_db; +CREATE DATABASE 01720_dictionary_db; + +CREATE TABLE 01720_dictionary_db.dictionary_source_table +( + key UInt8, + value String +) +ENGINE = TinyLog; + +INSERT INTO 01720_dictionary_db.dictionary_source_table VALUES (1, 'First'); + +CREATE DICTIONARY 01720_dictionary_db.dictionary +( + key UInt64, + value String +) +PRIMARY KEY key +SOURCE(CLICKHOUSE(DB '01720_dictionary_db' TABLE 'dictionary_source_table' HOST hostName() PORT tcpPort())) +LIFETIME(0) +LAYOUT(FLAT()); + +SELECT * FROM 01720_dictionary_db.dictionary; + +DROP DICTIONARY 01720_dictionary_db.dictionary; +DROP TABLE 01720_dictionary_db.dictionary_source_table; + +DROP DATABASE 01720_dictionary_db; diff --git a/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.reference b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql new file mode 100644 index 00000000000..d665dbc722f --- /dev/null +++ b/tests/queries/0_stateless/01720_engine_file_empty_if_not_exists.sql @@ -0,0 +1,16 @@ +DROP TABLE IF EXISTS file_engine_table; + +CREATE TABLE file_engine_table (id UInt32) ENGINE=File(TSV); + +SELECT * FROM file_engine_table; --{ serverError 107 } + +SET engine_file_empty_if_not_exists=0; + +SELECT * FROM file_engine_table; --{ serverError 107 } + +SET engine_file_empty_if_not_exists=1; + +SELECT * FROM file_engine_table; + +SET engine_file_empty_if_not_exists=0; +DROP TABLE file_engine_table; diff --git a/tests/queries/0_stateless/01720_union_distinct_with_limit.reference b/tests/queries/0_stateless/01720_union_distinct_with_limit.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01720_union_distinct_with_limit.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01720_union_distinct_with_limit.sql b/tests/queries/0_stateless/01720_union_distinct_with_limit.sql new file mode 100644 index 00000000000..9fc5b3eafd2 --- /dev/null +++ b/tests/queries/0_stateless/01720_union_distinct_with_limit.sql @@ -0,0 +1,8 @@ +SELECT x +FROM +( + SELECT 1 AS x + UNION DISTINCT + SELECT 1 +) +LIMIT 1; diff --git a/tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference new file mode 100644 index 00000000000..066b4bd1d97 --- /dev/null +++ b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.reference @@ -0,0 +1,10 @@ +-------- 42 -------- +42 14.0000 14.00000000 14.00000000 14.0000000000000000618637523926765281280 +42 14.0000 14.00000000 14.00000000 +14.0000 14.00000000 14.00000000 +-------- 4999 -------- +4999 1666.3333 1666.33333333 1666.33333333 1633.3553612205046244471093725648757194800 +4999 1666.3333 1666.33333333 1666.33333333 +1666.3333 1666.33333333 1666.33333333 +-------- 5000 -------- +0.1100 0.11000000 0.11000000 diff --git a/tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql new file mode 100644 index 00000000000..0451d455009 --- /dev/null +++ b/tests/queries/0_stateless/01721_dictionary_decimal_p_s.sql @@ -0,0 +1,78 @@ +set allow_experimental_bigint_types=1; +drop database if exists db_01721; +drop table if exists db_01721.table_decimal_dict; +drop dictionary if exists db_01721.decimal_dict; + + +create database db_01721; + +CREATE TABLE db_01721.table_decimal_dict( +KeyField UInt64, +Decimal32_ Decimal(5,4), +Decimal64_ Decimal(18,8), +Decimal128_ Decimal(25,8), +Decimal256_ Decimal(76,37) +) +ENGINE = Memory; + +insert into db_01721.table_decimal_dict +select number, + number / 3, + number / 3, + number / 3, + number / 3 +from numbers(5000); + + +CREATE DICTIONARY IF NOT EXISTS db_01721.decimal_dict ( + KeyField UInt64 DEFAULT 9999999, + Decimal32_ Decimal(5,4) DEFAULT 0.11, + Decimal64_ Decimal(18,8) DEFAULT 0.11, + Decimal128_ Decimal(25,8) DEFAULT 0.11 +-- ,Decimal256_ Decimal256(37) DEFAULT 0.11 +) +PRIMARY KEY KeyField +SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'table_decimal_dict' DB 'db_01721')) +LIFETIME(0) LAYOUT(SPARSE_HASHED); + +select '-------- 42 --------'; + +SELECT * from db_01721.table_decimal_dict where KeyField = 42; + +SELECT * from db_01721.decimal_dict where KeyField = 42; + +SELECT dictGet('db_01721.decimal_dict', 'Decimal32_', toUInt64(42)), + dictGet('db_01721.decimal_dict', 'Decimal64_', toUInt64(42)), + dictGet('db_01721.decimal_dict', 'Decimal128_', toUInt64(42)) + -- ,dictGet('db_01721.decimal_dict', 'Decimal256_', toUInt64(42)) +; + + +select '-------- 4999 --------'; + +SELECT * from db_01721.table_decimal_dict where KeyField = 4999; + +SELECT * from db_01721.decimal_dict where KeyField = 4999; + +SELECT dictGet('db_01721.decimal_dict', 'Decimal32_', toUInt64(4999)), + dictGet('db_01721.decimal_dict', 'Decimal64_', toUInt64(4999)), + dictGet('db_01721.decimal_dict', 'Decimal128_', toUInt64(4999)) + --,dictGet('db_01721.decimal_dict', 'Decimal256_', toUInt64(4999)) +; + +select '-------- 5000 --------'; + +SELECT * from db_01721.table_decimal_dict where KeyField = 5000; + +SELECT * from db_01721.decimal_dict where KeyField = 5000; + +SELECT dictGet('db_01721.decimal_dict', 'Decimal32_', toUInt64(5000)), + dictGet('db_01721.decimal_dict', 'Decimal64_', toUInt64(5000)), + dictGet('db_01721.decimal_dict', 'Decimal128_', toUInt64(5000)) + --,dictGet('db_01721.decimal_dict', 'Decimal256_', toUInt64(5000)) +; + +drop table if exists table_decimal_dict; +drop dictionary if exists cache_dict; +drop database if exists db_01721; + diff --git a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference new file mode 100644 index 00000000000..578661c9194 --- /dev/null +++ b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.reference @@ -0,0 +1,13 @@ +1 +2 +3 +4 +1 +2 +3 +4 +5 +6 +0 +1 +2 diff --git a/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql new file mode 100644 index 00000000000..079b2546a20 --- /dev/null +++ b/tests/queries/0_stateless/01721_engine_file_truncate_on_insert.sql @@ -0,0 +1,21 @@ +DROP TABLE IF EXISTS test; + +INSERT INTO TABLE FUNCTION file('01721_file/test/data.TSV', 'TSV', 'id UInt32') VALUES (1); +ATTACH TABLE test FROM '01721_file/test' (id UInt8) ENGINE=File(TSV); + +INSERT INTO test VALUES (2), (3); +INSERT INTO test VALUES (4); +SELECT * FROM test; + +SET engine_file_truncate_on_insert=0; + +INSERT INTO test VALUES (5), (6); +SELECT * FROM test; + +SET engine_file_truncate_on_insert=1; + +INSERT INTO test VALUES (0), (1), (2); +SELECT * FROM test; + +SET engine_file_truncate_on_insert=0; +DROP TABLE test; diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.reference b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.reference new file mode 100644 index 00000000000..02ae8a37e52 --- /dev/null +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.reference @@ -0,0 +1,20 @@ +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql new file mode 100644 index 00000000000..e43b81dca48 --- /dev/null +++ b/tests/queries/0_stateless/01730_distributed_group_by_no_merge_order_by_long.sql @@ -0,0 +1,20 @@ +drop table if exists data_01730; + +-- does not use 127.1 due to prefer_localhost_replica + +select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 20 settings distributed_group_by_no_merge=0, max_memory_usage='100Mi'; -- { serverError 241 } +-- no memory limit error, because with distributed_group_by_no_merge=2 remote servers will do ORDER BY and will cut to the LIMIT +select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 20 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi'; + +-- since the MergingSortedTransform will start processing only when all ports (remotes) will have some data, +-- and the query with GROUP BY on remote servers will first do GROUP BY and then send the block, +-- so the initiator will first receive all blocks from remotes and only after start merging, +-- and will hit the memory limit. +select * from remote('127.{2..11}', view(select * from numbers(1e6))) group by number order by number limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi'; -- { serverError 241 } + +-- with optimize_aggregation_in_order=1 remote servers will produce blocks more frequently, +-- since they don't need to wait until the aggregation will be finished, +-- and so the query will not hit the memory limit error. +create table data_01730 engine=MergeTree() order by key as select number key from numbers(1e6); +select * from remote('127.{2..11}', currentDatabase(), data_01730) group by key order by key limit 1e6 settings distributed_group_by_no_merge=2, max_memory_usage='100Mi', optimize_aggregation_in_order=1 format Null; +drop table data_01730; diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.reference b/tests/queries/0_stateless/01731_async_task_queue_wait.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01731_async_task_queue_wait.sh b/tests/queries/0_stateless/01731_async_task_queue_wait.sh new file mode 100755 index 00000000000..e0babf3c6ff --- /dev/null +++ b/tests/queries/0_stateless/01731_async_task_queue_wait.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# regression for 'Empty task was returned from async task queue' during query +# cancellation with async_socket_for_remote=1 (that ignores +# max_distributed_connections) +timeout --signal=SIGINT 1 ${CLICKHOUSE_CLIENT} --max_distributed_connections=1 --max_block_size=2 --interactive_delay=900000 -q "select number + sleep(0.3) as x from remote('127.{2,3}', system.numbers) settings max_block_size = 2" 2>&1 | grep "Empty task was returned from async task queue" || true diff --git a/tests/queries/0_stateless/01732_bigint_ubsan.reference b/tests/queries/0_stateless/01732_bigint_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01732_bigint_ubsan.sql b/tests/queries/0_stateless/01732_bigint_ubsan.sql new file mode 100644 index 00000000000..238a5d99d30 --- /dev/null +++ b/tests/queries/0_stateless/01732_bigint_ubsan.sql @@ -0,0 +1,11 @@ +CREATE TEMPORARY TABLE decimal +( + f dec(38, 38) +); + +INSERT INTO decimal VALUES (0); +INSERT INTO decimal VALUES (0.42); +INSERT INTO decimal VALUES (-0.42); + +SELECT f + 1048575, f - 21, f - 84, f * 21, f * -21, f / 21, f / 84 FROM decimal WHERE f > 0; -- { serverError 407 } +SELECT f + -2, f - 21, f - 84, f * 21, f * -21, f / 9223372036854775807, f / 84 FROM decimal WHERE f > 0; -- { serverError 407 } diff --git a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference new file mode 100644 index 00000000000..4f3181ecce0 --- /dev/null +++ b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.reference @@ -0,0 +1,8 @@ +1 2005-03-18 01:58:31.222 +2 2005-03-18 01:58:31.222 +3 2005-03-18 01:58:31.222 +4 2005-03-18 01:58:31.222 +2005-03-18 04:58:31.222 +2005-03-18 04:58:31.222 +2005-03-18 04:58:31.222 +0 diff --git a/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql new file mode 100644 index 00000000000..dcd874f8c45 --- /dev/null +++ b/tests/queries/0_stateless/01732_more_consistent_datetime64_parsing.sql @@ -0,0 +1,11 @@ +CREATE TEMPORARY TABLE t (i UInt8, x DateTime64(3, 'UTC')); +INSERT INTO t VALUES (1, 1111111111222); +INSERT INTO t VALUES (2, 1111111111.222); +INSERT INTO t VALUES (3, '1111111111222'); +INSERT INTO t VALUES (4, '1111111111.222'); +SELECT * FROM t ORDER BY i; + +SELECT toDateTime64(1111111111.222, 3); +SELECT toDateTime64('1111111111.222', 3); +SELECT toDateTime64('1111111111222', 3); +SELECT ignore(toDateTime64(1111111111222, 3)); -- This gives somewhat correct but unexpected result diff --git a/tests/queries/0_stateless/01733_transform_ubsan.reference b/tests/queries/0_stateless/01733_transform_ubsan.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01733_transform_ubsan.sql b/tests/queries/0_stateless/01733_transform_ubsan.sql new file mode 100644 index 00000000000..256603e9087 --- /dev/null +++ b/tests/queries/0_stateless/01733_transform_ubsan.sql @@ -0,0 +1,4 @@ +SELECT arrayStringConcat(arrayMap(x -> transform(x, [1025, -9223372036854775808, 65537, 257, 1048576, 10, 7, 1048575, 65536], ['yandex', 'googlegooglegooglegoogle', 'test', '', '', 'hello', 'world', '', 'xyz'], ''), arrayMap(x -> (x % -inf), range(number))), '') +FROM system.numbers +LIMIT 1025 +FORMAT Null; diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.reference b/tests/queries/0_stateless/01734_datetime64_from_float.reference new file mode 100644 index 00000000000..32e7d2736c6 --- /dev/null +++ b/tests/queries/0_stateless/01734_datetime64_from_float.reference @@ -0,0 +1,7 @@ +-- { echo } +SELECT CAST(1111111111.222 AS DateTime64(3)); +2005-03-18 04:58:31.222 +SELECT toDateTime(1111111111.222, 3); +2005-03-18 04:58:31.222 +SELECT toDateTime64(1111111111.222, 3); +2005-03-18 04:58:31.222 diff --git a/tests/queries/0_stateless/01734_datetime64_from_float.sql b/tests/queries/0_stateless/01734_datetime64_from_float.sql new file mode 100644 index 00000000000..b6be65cb7c2 --- /dev/null +++ b/tests/queries/0_stateless/01734_datetime64_from_float.sql @@ -0,0 +1,4 @@ +-- { echo } +SELECT CAST(1111111111.222 AS DateTime64(3)); +SELECT toDateTime(1111111111.222, 3); +SELECT toDateTime64(1111111111.222, 3); diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index 871d429e037..4e523545938 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -189,6 +189,7 @@ 01650_fetch_patition_with_macro_in_zk_path 01651_bugs_from_15889 01655_agg_if_nullable +01658_read_file_to_stringcolumn 01182_materialized_view_different_structure 01660_sum_ubsan 01669_columns_declaration_serde @@ -196,4 +197,13 @@ 01181_db_atomic_drop_on_cluster 01658_test_base64Encode_mysql_compatibility 01659_test_base64Decode_mysql_compatibility +01674_htm_xml_coarse_parse +01675_data_type_coroutine +01676_clickhouse_client_autocomplete 01671_aggregate_function_group_bitmap_data +01674_executable_dictionary_implicit_key +01686_rocksdb +01683_dist_INSERT_block_structure_mismatch +01702_bitmap_native_integers +01686_event_time_microseconds_part_log +01017_uniqCombined_memory_usage diff --git a/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py b/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py new file mode 100755 index 00000000000..3ed42f1c820 --- /dev/null +++ b/tests/queries/0_stateless/helpers/protobuf_length_delimited_encoder.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 + +# The protobuf compiler protoc doesn't support encoding or decoding length-delimited protobuf message. +# To do that this script has been written. + +import argparse +import os.path +import struct +import subprocess +import sys +import tempfile + +def read_varint(input): + res = 0 + shift = 0 + while True: + c = input.read(1) + if len(c) == 0: + return None + b = c[0] + if b < 0x80: + res += b << shift + break + b -= 0x80 + res += b << shift + shift = shift << 7 + return res + +def write_varint(output, value): + while True: + if value < 0x80: + b = value + output.write(b.to_bytes(1, byteorder='little')) + break + b = (value & 0x7F) + 0x80 + output.write(b.to_bytes(1, byteorder='little')) + value = value >> 7 + +def write_hexdump(output, data): + with subprocess.Popen(["hexdump", "-C"], stdin=subprocess.PIPE, stdout=output, shell=False) as proc: + proc.communicate(data) + if proc.returncode != 0: + raise RuntimeError("hexdump returned code " + str(proc.returncode)) + output.flush() + +class FormatSchemaSplitted: + def __init__(self, format_schema): + self.format_schema = format_schema + splitted = self.format_schema.split(':') + if len(splitted) < 2: + raise RuntimeError('The format schema must have the format "schemafile:MessageType"') + path = splitted[0] + self.schemadir = os.path.dirname(path) + self.schemaname = os.path.basename(path) + if not self.schemaname.endswith(".proto"): + self.schemaname = self.schemaname + ".proto" + self.message_type = splitted[1] + +def decode(input, output, format_schema): + if not type(format_schema) is FormatSchemaSplitted: + format_schema = FormatSchemaSplitted(format_schema) + msgindex = 1 + while True: + sz = read_varint(input) + if sz is None: + break + output.write("MESSAGE #{msgindex} AT 0x{msgoffset:08X}\n".format(msgindex=msgindex, msgoffset=input.tell()).encode()) + output.flush() + msg = input.read(sz) + if len(msg) < sz: + raise EOFError('Unexpected end of file') + with subprocess.Popen(["protoc", + "--decode", format_schema.message_type, format_schema.schemaname], + cwd=format_schema.schemadir, + stdin=subprocess.PIPE, + stdout=output, + shell=False) as proc: + proc.communicate(msg) + if proc.returncode != 0: + raise RuntimeError("protoc returned code " + str(proc.returncode)) + output.flush() + msgindex = msgindex + 1 + +def encode(input, output, format_schema): + if not type(format_schema) is FormatSchemaSplitted: + format_schema = FormatSchemaSplitted(format_schema) + line_offset = input.tell() + line = input.readline() + while True: + if len(line) == 0: + break + if not line.startswith(b"MESSAGE #"): + raise RuntimeError("The line at 0x{line_offset:08X} must start with the text 'MESSAGE #'".format(line_offset=line_offset)) + msg = b"" + while True: + line_offset = input.tell() + line = input.readline() + if line.startswith(b"MESSAGE #") or len(line) == 0: + break + msg += line + with subprocess.Popen(["protoc", + "--encode", format_schema.message_type, format_schema.schemaname], + cwd=format_schema.schemadir, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + shell=False) as proc: + msgbin = proc.communicate(msg)[0] + if proc.returncode != 0: + raise RuntimeError("protoc returned code " + str(proc.returncode)) + write_varint(output, len(msgbin)) + output.write(msgbin) + output.flush() + +def decode_and_check(input, output, format_schema): + input_data = input.read() + output.write(b"Binary representation:\n") + output.flush() + write_hexdump(output, input_data) + output.write(b"\n") + output.flush() + + with tempfile.TemporaryFile() as tmp_input, tempfile.TemporaryFile() as tmp_decoded, tempfile.TemporaryFile() as tmp_encoded: + tmp_input.write(input_data) + tmp_input.flush() + tmp_input.seek(0) + decode(tmp_input, tmp_decoded, format_schema) + tmp_decoded.seek(0) + decoded_text = tmp_decoded.read() + output.write(decoded_text) + output.flush() + tmp_decoded.seek(0) + encode(tmp_decoded, tmp_encoded, format_schema) + tmp_encoded.seek(0) + encoded_data = tmp_encoded.read() + + if encoded_data == input_data: + output.write(b"\nBinary representation is as expected\n") + output.flush() + else: + output.write(b"\nBinary representation differs from the expected one (listed below):\n") + output.flush() + write_hexdump(output, encoded_data) + sys.exit(1) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Encodes or decodes length-delimited protobuf messages.') + parser.add_argument('--input', help='The input file, the standard input will be used if not specified.') + parser.add_argument('--output', help='The output file, the standard output will be used if not specified') + parser.add_argument('--format_schema', required=True, help='Format schema in the format "schemafile:MessageType"') + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument('--encode', action='store_true', help='Specify to encode length-delimited messages.' + 'The utility will read text-format messages of the given type from the input and write it in binary to the output.') + group.add_argument('--decode', action='store_true', help='Specify to decode length-delimited messages.' + 'The utility will read messages in binary from the input and write text-format messages to the output.') + group.add_argument('--decode_and_check', action='store_true', help='The same as --decode, and the utility will then encode ' + ' the decoded data back to the binary form to check that the result of that encoding is the same as the input was.') + args = parser.parse_args() + + custom_input_file = None + custom_output_file = None + try: + if args.input: + custom_input_file = open(args.input, "rb") + if args.output: + custom_output_file = open(args.output, "wb") + input = custom_input_file if custom_input_file else sys.stdin.buffer + output = custom_output_file if custom_output_file else sys.stdout.buffer + + if args.encode: + encode(input, output, args.format_schema) + elif args.decode: + decode(input, output, args.format_schema) + elif args.decode_and_check: + decode_and_check(input, output, args.format_schema) + + finally: + if custom_input_file: + custom_input_file.close() + if custom_output_file: + custom_output_file.close() diff --git a/tests/queries/1_stateful/00139_like.sql b/tests/queries/1_stateful/00139_like.sql index ccc195bc81d..8cb84558407 100644 --- a/tests/queries/1_stateful/00139_like.sql +++ b/tests/queries/1_stateful/00139_like.sql @@ -1,4 +1,4 @@ -/* Заметим, что запросы написаны так, как будто пользователь не понимает смысл символа _ в LIKE выражении. */ +/* Note that queries are written as the user doesn't really understand that the symbol _ has special meaning in LIKE pattern. */ SELECT count() FROM test.hits WHERE URL LIKE '%/avtomobili_s_probegom/_%__%__%__%'; SELECT count() FROM test.hits WHERE URL LIKE '/avtomobili_s_probegom/_%__%__%__%'; SELECT count() FROM test.hits WHERE URL LIKE '%_/avtomobili_s_probegom/_%__%__%__%'; diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.reference b/tests/queries/1_stateful/00158_cache_dictionary_has.reference index f8d5cd4f53d..ad4bce6bec5 100644 --- a/tests/queries/1_stateful/00158_cache_dictionary_has.reference +++ b/tests/queries/1_stateful/00158_cache_dictionary_has.reference @@ -1,6 +1,6 @@ +100 6410 -6410 -25323 +100 25323 -1774655 +100 1774655 diff --git a/tests/queries/1_stateful/00158_cache_dictionary_has.sql b/tests/queries/1_stateful/00158_cache_dictionary_has.sql index 063e7843fd4..8461728c58e 100644 --- a/tests/queries/1_stateful/00158_cache_dictionary_has.sql +++ b/tests/queries/1_stateful/00158_cache_dictionary_has.sql @@ -6,15 +6,15 @@ CREATE DICTIONARY db_dict.cache_hits PRIMARY KEY WatchID SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'hits' PASSWORD '' DB 'test')) LIFETIME(MIN 300 MAX 600) -LAYOUT(CACHE(SIZE_IN_CELLS 100000 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000)); +LAYOUT(CACHE(SIZE_IN_CELLS 100 QUERY_WAIT_TIMEOUT_MILLISECONDS 600000)); -SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0); +SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 1400 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 1400 == 0; -SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0); +SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 350 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 350 == 0; -SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0); +SELECT sum(flag) FROM (SELECT dictHas('db_dict.cache_hits', toUInt64(WatchID)) as flag FROM test.hits PREWHERE WatchID % 5 == 0 LIMIT 100); SELECT count() from test.hits PREWHERE WatchID % 5 == 0; DROP DICTIONARY IF EXISTS db_dict.cache_hits; diff --git a/tests/queries/conftest.py b/tests/queries/conftest.py index 2f19ae7c479..40a9a6b3a2e 100644 --- a/tests/queries/conftest.py +++ b/tests/queries/conftest.py @@ -25,6 +25,9 @@ def bin_prefix(cmdopts): prefix = 'clickhouse' if cmdopts['builddir'] is not None: prefix = os.path.join(cmdopts['builddir'], 'programs', prefix) + # FIXME: does this hangs the server start for some reason? + # if not os.path.isabs(prefix): + # prefix = os.path.abspath(prefix) return prefix diff --git a/tests/queries/query_test.py b/tests/queries/query_test.py index c4e7e613175..417a51fe523 100644 --- a/tests/queries/query_test.py +++ b/tests/queries/query_test.py @@ -14,13 +14,10 @@ SKIP_LIST = [ "00987_distributed_stack_overflow", # just fail - "00302_http_compression", - "00463_long_sessions_in_http_interface", "00505_secure", "00505_shard_secure", - "00506_union_distributed", # flaky "00646_url_engine", - "00821_distributed_storage_with_join_on.sql", # flaky + "00725_memory_tracking", # BROKEN "00834_cancel_http_readonly_queries_on_client_close", "00933_test_fix_extra_seek_on_compressed_cache", "00965_logs_level_bugfix", @@ -30,38 +27,34 @@ SKIP_LIST = [ "01014_lazy_database_concurrent_recreate_reattach_and_show_tables", "01018_Distributed__shard_num", "01018_ip_dictionary", - "01023_materialized_view_query_context", # flaky - "01035_lc_empty_part_bug", # flaky - "01037_polygon_dicts_simple_functions.sh", # flaky - "01046_materialized_view_with_join_over_distributed", # flaky "01050_clickhouse_dict_source_with_subquery", "01053_ssd_dictionary", "01054_cache_dictionary_overflow_cell", "01057_http_compression_prefer_brotli", "01080_check_for_error_incorrect_size_of_nested_column", "01083_expressions_in_engine_arguments", - "01086_odbc_roundtrip", + # "01086_odbc_roundtrip", "01088_benchmark_query_id", "01098_temporary_and_external_tables", - "01099_parallel_distributed_insert_select", # flaky + "01099_parallel_distributed_insert_select", "01103_check_cpu_instructions_at_startup", "01114_database_atomic", "01148_zookeeper_path_macros_unfolding", - "01193_metadata_loading.sh", # flaky - "01274_alter_rename_column_distributed", # flaky + "01181_db_atomic_drop_on_cluster", # tcp port in reference "01280_ssd_complex_key_dictionary", "01293_client_interactive_vertical_multiline", # expect-test "01293_client_interactive_vertical_singleline", # expect-test + "01293_system_distribution_queue", # FLAKY "01293_show_clusters", "01294_lazy_database_concurrent_recreate_reattach_and_show_tables", "01294_system_distributed_on_cluster", "01300_client_save_history_when_terminated", # expect-test "01304_direct_io", "01306_benchmark_json", + "01035_lc_empty_part_bug", # FLAKY "01320_create_sync_race_condition_zookeeper", "01355_CSV_input_format_allow_errors", "01370_client_autocomplete_word_break_characters", # expect-test - "01375_storage_file_tsv_csv_with_names_write_prefix", # flaky "01376_GROUP_BY_injective_elimination_dictGet", "01393_benchmark_secure_port", "01418_custom_settings", @@ -72,6 +65,7 @@ SKIP_LIST = [ "01507_clickhouse_server_start_with_embedded_config", "01514_distributed_cancel_query_on_error", "01520_client_print_query_id", # expect-test + "01526_client_start_and_exit", # expect-test "01527_dist_sharding_key_dictGet_reload", "01545_url_file_format_settings", "01553_datetime64_comparison", @@ -79,17 +73,18 @@ SKIP_LIST = [ "01558_ttest_scipy", "01561_mann_whitney_scipy", "01582_distinct_optimization", - "01586_storage_join_low_cardinality_key", - "01599_multiline_input_and_singleline_comments", - "01600_benchmark_query", + "01599_multiline_input_and_singleline_comments", # expect-test "01601_custom_tld", - "01601_proxy_protocol", + "01610_client_spawn_editor", # expect-test + "01676_clickhouse_client_autocomplete", # expect-test (partially) + "01683_text_log_deadlock", # secure tcp ] def check_result(result, error, return_code, reference, replace_map): - for old, new in replace_map.items(): - result = result.replace(old.encode('utf-8'), new.encode('utf-8')) + if replace_map: + for old, new in replace_map.items(): + result = result.replace(old.encode('utf-8'), new.encode('utf-8')) if return_code != 0: try: @@ -106,9 +101,9 @@ def check_result(result, error, return_code, reference, replace_map): pytrace=False) -def run_client(bin_prefix, port, query, reference, replace_map={}): +def run_client(bin_prefix, port, database, query, reference, replace_map=None): # We can't use `text=True` since some tests may return binary data - client = subprocess.Popen([bin_prefix + '-client', '--port', str(port), '-m', '-n', '--testmode'], + client = subprocess.Popen([bin_prefix + '-client', '--port', str(port), '-d', database, '-m', '-n', '--testmode'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) result, error = client.communicate(query.encode('utf-8')) assert client.returncode is not None, "Client should exit after processing all queries" @@ -116,12 +111,13 @@ def run_client(bin_prefix, port, query, reference, replace_map={}): check_result(result, error, client.returncode, reference, replace_map) -def run_shell(bin_prefix, server, database, path, reference, replace_map={}): +def run_shell(bin_prefix, server, database, path, reference, replace_map=None): env = { 'CLICKHOUSE_BINARY': bin_prefix, 'CLICKHOUSE_DATABASE': database, 'CLICKHOUSE_PORT_TCP': str(server.tcp_port), 'CLICKHOUSE_PORT_TCP_SECURE': str(server.tcps_port), + 'CLICKHOUSE_PORT_TCP_WITH_PROXY': str(server.proxy_port), 'CLICKHOUSE_PORT_HTTP': str(server.http_port), 'CLICKHOUSE_PORT_INTERSERVER': str(server.inter_port), 'CLICKHOUSE_TMP': server.tmp_dir, @@ -136,6 +132,7 @@ def run_shell(bin_prefix, server, database, path, reference, replace_map={}): def random_str(length=10): alphabet = string.ascii_lowercase + string.digits + random.seed(os.urandom(8)) return ''.join(random.choice(alphabet) for _ in range(length)) @@ -159,17 +156,18 @@ def test_sql_query(bin_prefix, sql_query, standalone_server): reference = file.read() random_name = 'test_{random}'.format(random=random_str()) - query = 'CREATE DATABASE {random}; USE {random}; {query}'.format(random=random_name, query=query) - run_client(bin_prefix, tcp_port, query, reference, {random_name: 'default'}) + run_client(bin_prefix, tcp_port, 'default', 'CREATE DATABASE {random};'.format(random=random_name), b'') + + run_client(bin_prefix, tcp_port, random_name, query, reference, {random_name: 'default'}) query = "SELECT 'SHOW ORPHANED TABLES'; SELECT name FROM system.tables WHERE database != 'system' ORDER BY (database, name);" - run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED TABLES\n') + run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED TABLES\n') query = 'DROP DATABASE {random};'.format(random=random_name) - run_client(bin_prefix, tcp_port, query, b'') + run_client(bin_prefix, tcp_port, 'default', query, b'') query = "SELECT 'SHOW ORPHANED DATABASES'; SHOW DATABASES;" - run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n') + run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n') def test_shell_query(bin_prefix, shell_query, standalone_server): @@ -191,15 +189,15 @@ def test_shell_query(bin_prefix, shell_query, standalone_server): random_name = 'test_{random}'.format(random=random_str()) query = 'CREATE DATABASE {random};'.format(random=random_name) - run_client(bin_prefix, tcp_port, query, b'') + run_client(bin_prefix, tcp_port, 'default', query, b'') run_shell(bin_prefix, standalone_server, random_name, shell_path, reference, {random_name: 'default'}) query = "SELECT 'SHOW ORPHANED TABLES'; SELECT name FROM system.tables WHERE database != 'system' ORDER BY (database, name);" - run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED TABLES\n') + run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED TABLES\n') query = 'DROP DATABASE {random};'.format(random=random_name) - run_client(bin_prefix, tcp_port, query, b'') + run_client(bin_prefix, tcp_port, 'default', query, b'') query = "SELECT 'SHOW ORPHANED DATABASES'; SHOW DATABASES;" - run_client(bin_prefix, tcp_port, query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n') + run_client(bin_prefix, tcp_port, 'default', query, b'SHOW ORPHANED DATABASES\ndefault\nsystem\n') diff --git a/tests/queries/server.py b/tests/queries/server.py index 599de2400e3..ed12931e658 100644 --- a/tests/queries/server.py +++ b/tests/queries/server.py @@ -37,6 +37,7 @@ class ServerThread(threading.Thread): self.tcps_port = port_base + 4 self.https_port = port_base + 5 self.odbc_port = port_base + 6 + self.proxy_port = port_base + 7 self._args = [ '--config-file={config_path}'.format(config_path=self.server_config), @@ -44,6 +45,7 @@ class ServerThread(threading.Thread): '--tcp_port={tcp_port}'.format(tcp_port=self.tcp_port), '--http_port={http_port}'.format(http_port=self.http_port), '--interserver_http_port={inter_port}'.format(inter_port=self.inter_port), + '--tcp_with_proxy_port={proxy_port}'.format(proxy_port=self.proxy_port), # TODO: SSL certificate is not specified '--tcp_port_secure={tcps_port}'.format(tcps_port=self.tcps_port), ] @@ -76,8 +78,8 @@ class ServerThread(threading.Thread): print('Successful server response:', s.recv(1024)) # FIXME: read whole buffered response s.shutdown(socket.SHUT_RDWR) s.close() - except Exception as e: - print('Failed to connect to server:', e, file=sys.stderr) + except Exception: + # Failed to connect to server - try again continue else: break @@ -96,6 +98,10 @@ class ServerThread(threading.Thread): self._lock.release() + if not retries: + print('Failed to start server', file=sys.stderr) + return + while self._proc.returncode is None: self._proc.communicate() @@ -297,6 +303,10 @@ ServerThread.DEFAULT_SERVER_CONFIG = \ testkeeper + + /clickhouse/task_queue/ddl + + system part_log
@@ -1112,6 +1122,136 @@ ServerThread.DEFAULT_DICTIONARIES_CONFIG = \ + + + simple_executable_cache_dictionary_no_implicit_key + + + id + UInt64 + + + + value + String + + + + + + echo "1\tValue" + TabSeparated + false + + + + + 10000 + + + 300 + + + + simple_executable_cache_dictionary_implicit_key + + + id + UInt64 + + + + value + String + + + + + + echo "Value" + TabSeparated + true + + + + + 10000 + + + 300 + + + + complex_executable_cache_dictionary_no_implicit_key + + + + id + UInt64 + + + + id_key + String + + + + + value + String + + + + + + echo "1\tFirstKey\tValue" + TabSeparated + false + + + + + 10000 + + + 300 + + + + complex_executable_cache_dictionary_implicit_key + + + + id + UInt64 + + + + id_key + String + + + + + value + String + + + + + + echo "Value" + TabSeparated + true + + + + + 10000 + + + 300 + """ diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 0ca2cee3c77..d20b5669cc5 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -8,9 +8,12 @@ export CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL [ -v CLICKHOUSE_CONFIG_CLIENT ] && CLICKHOUSE_CLIENT_OPT0+=" --config-file=${CLICKHOUSE_CONFIG_CLIENT} " [ -v CLICKHOUSE_HOST ] && CLICKHOUSE_CLIENT_OPT0+=" --host=${CLICKHOUSE_HOST} " [ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_CLIENT_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " +[ -v CLICKHOUSE_PORT_TCP ] && CLICKHOUSE_BENCHMARK_OPT0+=" --port=${CLICKHOUSE_PORT_TCP} " [ -v CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL ] && CLICKHOUSE_CLIENT_OPT0+=" --send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL} " [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_CLIENT_OPT0+=" --database=${CLICKHOUSE_DATABASE} " +[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_CLIENT_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' " [ -v CLICKHOUSE_DATABASE ] && CLICKHOUSE_BENCHMARK_OPT0+=" --database=${CLICKHOUSE_DATABASE} " +[ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' " export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"} [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client} @@ -51,14 +54,18 @@ export CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:="8123"} export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=https_port 2>/dev/null)} 2>/dev/null export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:="8443"} export CLICKHOUSE_PORT_HTTP_PROTO=${CLICKHOUSE_PORT_HTTP_PROTO:="http"} +export CLICKHOUSE_PORT_MYSQL=${CLICKHOUSE_PORT_MYSQL:=$(${CLICKHOUSE_EXTRACT_CONFIG} --try --key=mysql_port 2>/dev/null)} 2>/dev/null +export CLICKHOUSE_PORT_MYSQL=${CLICKHOUSE_PORT_MYSQL:="9004"} -# Add database to url params +# Add database and log comment to url params if [ -v CLICKHOUSE_URL_PARAMS ] then export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&database=${CLICKHOUSE_DATABASE}" else export CLICKHOUSE_URL_PARAMS="database=${CLICKHOUSE_DATABASE}" fi +# Note: missing url encoding of the log comment. +[ -v CLICKHOUSE_LOG_COMMENT ] && export CLICKHOUSE_URL_PARAMS="${CLICKHOUSE_URL_PARAMS}&log_comment=${CLICKHOUSE_LOG_COMMENT}" export CLICKHOUSE_URL=${CLICKHOUSE_URL:="${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/"} export CLICKHOUSE_URL_HTTPS=${CLICKHOUSE_URL_HTTPS:="https://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTPS}/"} @@ -82,6 +89,17 @@ export CLICKHOUSE_CURL=${CLICKHOUSE_CURL:="${CLICKHOUSE_CURL_COMMAND} -q -s --ma export CLICKHOUSE_TMP=${CLICKHOUSE_TMP:="."} mkdir -p ${CLICKHOUSE_TMP} +export MYSQL_CLIENT_BINARY=${MYSQL_CLIENT_BINARY:="mysql"} +export MYSQL_CLIENT_CLICKHOUSE_USER=${MYSQL_CLIENT_CLICKHOUSE_USER:="default"} +# Avoids "Can't connect to local MySQL server through socket '/var/run/mysqld/mysqld.sock'" when connecting to localhost +[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --protocol tcp " +[ -v CLICKHOUSE_HOST ] && MYSQL_CLIENT_OPT0+=" --host ${CLICKHOUSE_HOST} " +[ -v CLICKHOUSE_PORT_MYSQL ] && MYSQL_CLIENT_OPT0+=" --port ${CLICKHOUSE_PORT_MYSQL} " +[ -v CLICKHOUSE_DATABASE ] && MYSQL_CLIENT_OPT0+=" --database ${CLICKHOUSE_DATABASE} " +MYSQL_CLIENT_OPT0+=" --user ${MYSQL_CLIENT_CLICKHOUSE_USER} " +export MYSQL_CLIENT_OPT="${MYSQL_CLIENT_OPT0:-} ${MYSQL_CLIENT_OPT:-}" +export MYSQL_CLIENT=${MYSQL_CLIENT:="$MYSQL_CLIENT_BINARY ${MYSQL_CLIENT_OPT:-}"} + function clickhouse_client_removed_host_parameter() { # removing only `--host=value` and `--host value` (removing '-hvalue' feels to dangerous) with python regex. diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 8ed1e890cf1..39ec8bac3cf 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -17,7 +17,8 @@ "functions_bad_arguments", /// Too long for TSan "01603_read_with_backoff_bug", /// Too long for TSan "01646_system_restart_replicas_smoke", /// RESTART REPLICAS can acquire too much locks, while only 64 is possible from one thread under TSan - "01641_memory_tracking_insert_optimize" /// INSERT lots of rows is too heavy for TSan + "01641_memory_tracking_insert_optimize", /// INSERT lots of rows is too heavy for TSan + "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage ], "address-sanitizer": [ "00877", @@ -27,7 +28,8 @@ "01103_check_cpu_instructions_at_startup", "01473_event_time_microseconds", "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers - "01193_metadata_loading" + "01193_metadata_loading", + "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage ], "ub-sanitizer": [ "capnproto", @@ -48,7 +50,8 @@ "00877_memory_limit_for_new_delete", /// memory limits don't work correctly under msan because it replaces malloc/free "01473_event_time_microseconds", "01526_max_untracked_memory", /// requires TraceCollector, does not available under sanitizers - "01193_metadata_loading" + "01193_metadata_loading", + "01017_uniqCombined_memory_usage" /// Fine thresholds on memory usage ], "debug-build": [ "query_profiler", @@ -90,6 +93,7 @@ "01300_client_save_history_when_terminated", "orc_output", "01370_client_autocomplete_word_break_characters", + "01676_clickhouse_client_autocomplete", "01193_metadata_loading", "01455_time_zones" ], @@ -101,8 +105,165 @@ "00510_materizlized_view_and_deduplication_zookeeper", "00738_lock_for_inner_table" ], + "database-replicated": [ + /// Tests with DETACH TABLE (it's not allowed) + /// and tests with SET (session and query settings are not supported) + "memory_tracking", + "memory_usage", + "live_view", + "01413_alter_update_supertype", + "01149_zookeeper_mutation_stuck_after_replace_partition", + "00836_indices_alter_replicated_zookeeper", + "00652_mutations_alter_update", + "01715_tuple_insert_null_as_default", + "00825_protobuf_format_map", + "00152_insert_different_granularity", + "01715_background_checker_blather_zookeeper", + "01714_alter_drop_version", + "01114_materialize_clear_index_compact_parts", + "00814_replicated_minimalistic_part_header_zookeeper", + "01188_attach_table_from_pat", + "01415_sticking_mutations", + "01130_in_memory_parts", + "01110_dictionary_layout_without_arguments", + "01018_ddl_dictionaries_create", + "01018_ddl_dictionaries_select", + "01414_freeze_does_not_prevent_alters", + "01018_ddl_dictionaries_bad_queries", + "01686_rocksdb", + "01550_mutation_subquery", + "01070_mutations_with_dependencies", + "01070_materialize_ttl", + "01055_compact_parts", + "01017_mutations_with_nondeterministic_functions_zookeeper", + "00926_adaptive_index_granularity_pk", + "00910_zookeeper_test_alter_compression_codecs", + "00908_bloom_filter_index", + "00616_final_single_part", + "00446_clear_column_in_partition_zookeeper", + "01533_multiple_nested", + "01213_alter_rename_column_zookeeper", + "01575_disable_detach_table_of_dictionary", + "01457_create_as_table_function_structure", + "01415_inconsistent_merge_tree_settings", + "01413_allow_non_metadata_alters", + "01378_alter_rename_with_ttl_zookeeper", + "01349_mutation_datetime_key", + "01325_freeze_mutation_stuck", + "01272_suspicious_codecs", + "01181_db_atomic_drop_on_cluster", + "00957_delta_diff_bug", + "00910_zookeeper_custom_compression_codecs_replicated", + "00899_long_attach_memory_limit", + "00804_test_custom_compression_codes_log_storages", + "00804_test_alter_compression_codecs", + "00804_test_delta_codec_no_type_alter", + "00804_test_custom_compression_codecs", + "00753_alter_attach", + "00715_fetch_merged_or_mutated_part_zookeeper", + "00688_low_cardinality_serialization", + "01575_disable_detach_table_of_dictionary", + "00738_lock_for_inner_table", + "01666_blns", + "01652_ignore_and_low_cardinality", + "01651_map_functions", + "01650_fetch_patition_with_macro_in_zk_path", + "01648_mutations_and_escaping", + "01640_marks_corruption_regression", + "01622_byte_size", + "01611_string_to_low_cardinality_key_alter", + "01602_show_create_view", + "01600_log_queries_with_extensive_info", + "01560_ttl_remove_empty_parts", + "01554_bloom_filter_index_big_integer_uuid", + "01550_type_map_formats_input", + "01550_type_map_formats", + "01550_create_map_type", + "01532_primary_key_without_order_by_zookeeper", + "01511_alter_version_versioned_collapsing_merge_tree_zookeeper", + "01509_parallel_quorum_insert_no_replicas", + "01504_compression_multiple_streams", + "01494_storage_join_persistency", + "01493_storage_set_persistency", + "01493_alter_remove_properties_zookeeper", + "01475_read_subcolumns_storages", + "01475_read_subcolumns", + "01451_replicated_detach_drop_part", + "01451_detach_drop_part", + "01440_big_int_exotic_casts", + "01430_modify_sample_by_zookeeper", + "01417_freeze_partition_verbose_zookeeper", + "01417_freeze_partition_verbose", + "01396_inactive_replica_cleanup_nodes_zookeeper", + "01375_compact_parts_codecs", + "01357_version_collapsing_attach_detach_zookeeper", + "01355_alter_column_with_order", + "01291_geo_types", + "01270_optimize_skip_unused_shards_low_cardinality", + "01182_materialized_view_different_structure", + "01150_ddl_guard_rwr", + "01148_zookeeper_path_macros_unfolding", + "01135_default_and_alter_zookeeper", + "01130_in_memory_parts_partitons", + "01127_month_partitioning_consistency_select", + "01114_database_atomic", + "01083_expressions_in_engine_arguments", + "01073_attach_if_not_exists", + "01072_optimize_skip_unused_shards_const_expr_eval", + "01071_prohibition_secondary_index_with_old_format_merge_tree", + "01062_alter_on_mutataion_zookeeper", + "01060_shutdown_table_after_detach", + "01056_create_table_as", + "01035_avg", + "01021_only_tuple_columns", + "01019_alter_materialized_view_query", + "01019_alter_materialized_view_consistent", + "01019_alter_materialized_view_atomic", + "01015_attach_part", + "00989_parallel_parts_loading", + "00980_zookeeper_merge_tree_alter_settings", + "00980_merge_alter_settings", + "00955_test_final_mark", + "00933_reserved_word", + "00926_zookeeper_adaptive_index_granularity_replicated_merge_tree", + "00926_adaptive_index_granularity_replacing_merge_tree", + "00926_adaptive_index_granularity_merge_tree", + "00925_zookeeper_empty_replicated_merge_tree_optimize_final", + "00800_low_cardinality_distinct_numeric", + "00754_alter_modify_order_by_replicated_zookeeper", + "00751_low_cardinality_nullable_group_by", + "00751_default_databasename_for_view", + "00719_parallel_ddl_table", + "00718_low_cardinaliry_alter", + "00717_low_cardinaliry_distributed_group_by", + "00688_low_cardinality_syntax", + "00688_low_cardinality_nullable_cast", + "00688_low_cardinality_in", + "00652_replicated_mutations_zookeeper", + "00634_rename_view", + "00626_replace_partition_from_table", + "00625_arrays_in_nested", + "00623_replicated_truncate_table_zookeeper", + "00619_union_highlite", + "00599_create_view_with_subquery", + "00571_non_exist_database_when_create_materializ_view", + "00553_buff_exists_materlized_column", + "00516_deduplication_after_drop_partition_zookeeper", + "00508_materialized_view_to", + "00446_clear_column_in_partition_concurrent_zookeeper", + "00423_storage_log_single_thread", + "00311_array_primary_key", + "00236_replicated_drop_on_non_leader_zookeeper", + "00226_zookeeper_deduplication_and_unexpected_parts", + "00215_primary_key_order_zookeeper", + "00180_attach_materialized_view", + "00121_drop_column_zookeeper", + "00116_storage_set", + "00083_create_merge_tree_zookeeper", + "00062_replicated_merge_tree_alter_zookeeper" + ], "polymorphic-parts": [ - "01508_partition_pruning", /// bug, shoud be fixed + "01508_partition_pruning_long", /// bug, shoud be fixed "01482_move_to_prewhere_and_cast" /// bug, shoud be fixed ], "antlr": [ @@ -121,6 +282,12 @@ "00763_create_query_as_table_engine_bug", "00765_sql_compatibility_aliases", "00825_protobuf_format_input", + "00825_protobuf_format_nested_optional", + "00825_protobuf_format_array_3dim", + "00825_protobuf_format_map", + "00825_protobuf_format_array_of_arrays", + "00825_protobuf_format_table_default", + "00825_protobuf_format_enum_mapping", "00826_cross_to_inner_join", "00834_not_between", "00909_kill_not_initialized_query", @@ -151,6 +318,7 @@ "01015_attach_part", "01015_database_bad_tables", "01017_uniqCombined_memory_usage", + "01018_ddl_dictionaries_concurrent_requrests", /// Cannot parse ATTACH DICTIONARY IF NOT EXISTS "01019_alter_materialized_view_atomic", "01019_alter_materialized_view_consistent", "01019_alter_materialized_view_query", @@ -266,7 +434,7 @@ "01501_clickhouse_client_INSERT_exception", "01504_compression_multiple_streams", "01508_explain_header", - "01508_partition_pruning", + "01508_partition_pruning_long", "01509_check_parallel_quorum_inserts", "01509_parallel_quorum_and_merge", "01515_mv_and_array_join_optimisation_bag", @@ -280,7 +448,7 @@ "01530_drop_database_atomic_sync", "01532_execute_merges_on_single_replica", "01532_primary_key_without_order_by_zookeeper", - "01541_max_memory_usage_for_user", + "01541_max_memory_usage_for_user_long", "01551_mergetree_read_in_order_spread", "01552_dict_fixedstring", "01554_bloom_filter_index_big_integer_uuid", @@ -310,18 +478,23 @@ "01642_if_nullable_regression", "01643_system_suspend", "01655_plan_optimizations", - "01475_read_subcolumns_storages" + "01475_read_subcolumns_storages", + "01674_clickhouse_client_query_param_cte", + "01666_merge_tree_max_query_limit" ], "parallel": [ /// Pessimistic list of tests which work badly in parallel. /// Probably they need better investigation. "00062_replicated_merge_tree_alter_zookeeper", + "00080_show_tables_and_system_tables", + "00101_materialized_views_and_insert_without_explicit_database", "00109_shard_totals_after_having", "00110_external_sort", "00116_storage_set", "00121_drop_column_zookeeper", "00133_long_shard_memory_tracker_and_exception_safety", + "00158_buffer_and_nonexistent_table", "00180_attach_materialized_view", "00226_zookeeper_deduplication_and_unexpected_parts", "00236_replicated_drop_on_non_leader_zookeeper", @@ -342,13 +515,17 @@ "00571_non_exist_database_when_create_materializ_view", "00575_illegal_column_exception_when_drop_depen_column", "00599_create_view_with_subquery", + "00604_show_create_database", "00612_http_max_query_size", "00619_union_highlite", "00620_optimize_on_nonleader_replica_zookeeper", + "00623_truncate_table", + "00623_truncate_table_throw_exception", "00625_arrays_in_nested", "00626_replace_partition_from_table", "00626_replace_partition_from_table_zookeeper", "00633_materialized_view_and_too_many_parts_zookeeper", + "00643_cast_zookeeper", "00652_mergetree_mutations", "00652_replicated_mutations_zookeeper", "00682_empty_parts_merge", @@ -357,20 +534,28 @@ "00699_materialized_view_mutations", "00701_rollup", "00715_fetch_merged_or_mutated_part_zookeeper", + "00716_allow_ddl", + "00719_parallel_ddl_db", + "00740_database_in_nested_view", + "00741_client_comment_multiline", "00751_default_databasename_for_view", "00753_alter_attach", "00754_alter_modify_column_partitions", "00754_alter_modify_order_by_replicated_zookeeper", "00763_long_lock_buffer_alter_destination_table", + "00800_versatile_storage_join", "00804_test_alter_compression_codecs", "00804_test_custom_compression_codecs", "00804_test_custom_compression_codes_log_storages", "00804_test_delta_codec_compression", + "00815_left_join_on_stepanel", "00834_cancel_http_readonly_queries_on_client_close", "00834_kill_mutation", "00834_kill_mutation_replicated_zookeeper", "00840_long_concurrent_select_and_drop_deadlock", + "00857_global_joinsavel_table_alias", "00899_long_attach_memory_limit", + "00910_buffer_prewhere", "00910_zookeeper_custom_compression_codecs_replicated", "00926_adaptive_index_granularity_merge_tree", "00926_adaptive_index_granularity_pk", @@ -388,49 +573,94 @@ "00988_constraints_replication_zookeeper", "00989_parallel_parts_loading", "00993_system_parts_race_condition_drop_zookeeper", + "01012_show_tables_limit", "01013_sync_replica_timeout_zookeeper", + "01014_lazy_database_basic", "01014_lazy_database_concurrent_recreate_reattach_and_show_tables", "01015_attach_part", + "01015_database_bad_tables", "01018_ddl_dictionaries_concurrent_requrests", "01018_ddl_dictionaries_create", "01018_ddl_dictionaries_select", + "01018_ddl_dictionaries_special", + "01018_dictionaries_from_dictionaries", + "01018_ip_dictionary", "01021_only_tuple_columns", + "01023_materialized_view_query_context", "01031_mutations_interpreter_and_context", "01033_dictionaries_lifetime", "01035_concurrent_move_partition_from_table_zookeeper", + "01036_no_superfluous_dict_reload_on_create_database", + "01036_no_superfluous_dict_reload_on_create_database_2", + "01037_polygon_dicts_correctness_all", + "01037_polygon_dicts_correctness_fast", + "01037_polygon_dicts_simple_functions", + "01038_dictionary_lifetime_min_zero_sec", + "01040_dictionary_invalidate_query_switchover_long", + "01041_create_dictionary_if_not_exists", + "01042_system_reload_dictionary_reloads_completely", + "01043_dictionary_attribute_properties_values", + "01045_dictionaries_restrictions", "01045_zookeeper_system_mutations_with_parts_names", + "01048_exists_query", + "01053_drop_database_mat_view", "01053_ssd_dictionary", + "01054_cache_dictionary_bunch_update", + "01054_cache_dictionary_overflow_cell", "01055_compact_parts_1", + "01056_create_table_as", "01060_avro", "01060_shutdown_table_after_detach", + "01069_database_memory", "01070_materialize_ttl", "01070_modify_ttl", "01070_mutations_with_dependencies", "01071_live_view_detach_dependency", "01071_prohibition_secondary_index_with_old_format_merge_tree", "01073_attach_if_not_exists", + "01073_show_tables_not_like", + "01076_cache_dictionary_datarace_exception_ptr", "01076_parallel_alter_replicated_zookeeper", "01079_parallel_alter_add_drop_column_zookeeper", "01079_parallel_alter_detach_table_zookeeper", + "01080_check_for_error_incorrect_size_of_nested_column", "01083_expressions_in_engine_arguments", + "01084_regexp_empty", "01085_max_distributed_connections_http", "01092_memory_profiler", "01098_temporary_and_external_tables", + "01103_distributed_product_mode_local_column_renames", "01107_atomic_db_detach_attach", "01108_restart_replicas_rename_deadlock_zookeeper", + "01109_exchange_tables", "01110_dictionary_layout_without_arguments", + "01113_local_dictionary_type_conversion", "01114_database_atomic", + "01114_mysql_database_engine_segfault", + "01115_join_with_dictionary", + "01125_dict_ddl_cannot_add_column", "01127_month_partitioning_consistency_select", "01130_in_memory_parts_partitons", "01135_default_and_alter_zookeeper", "01148_zookeeper_path_macros_unfolding", + "01150_ddl_guard_rwr", + "01185_create_or_replace_table", "01190_full_attach_syntax", + "01191_rename_dictionary", + "01192_rename_database_zookeeper", "01193_metadata_loading", "01200_mutations_memory_consumption", + "01224_no_superfluous_dict_reload", + "01225_drop_dictionary_as_table", + "01225_show_create_table_from_dictionary", + "01231_distributed_aggregation_memory_efficient_mix_levels", + "01232_extremes", "01238_http_memory_tracking", "01249_bad_arguments_for_bloom_filter", "01251_dict_is_in_infinite_loop", + "01254_dict_create_without_db", "01254_dict_load_after_detach_attach", + "01257_dictionary_mismatch_types", "01259_dictionary_custom_settings_ddl", "01267_alter_default_key_columns_zookeeper", "01268_dictionary_direct_layout", @@ -444,18 +674,26 @@ "01293_system_distribution_queue", "01294_lazy_database_concurrent", "01294_lazy_database_concurrent_recreate_reattach_and_show_tables", + "01294_system_distributed_on_cluster", + "01296_create_row_policy_in_current_database", + "01297_create_quota", "01305_replica_create_drop_zookeeper", "01307_multiple_leaders_zookeeper", "01318_long_unsuccessful_mutation_zookeeper", "01319_manual_write_to_replicas", + "01320_create_sync_race_condition_zookeeper", "01338_long_select_and_alter", "01338_long_select_and_alter_zookeeper", "01355_alter_column_with_order", "01355_ilike", "01357_version_collapsing_attach_detach_zookeeper", "01375_compact_parts_codecs", + "01376_GROUP_BY_injective_elimination_dictGet", "01378_alter_rename_with_ttl_zookeeper", + "01383_remote_ambiguous_column_shard", "01388_clear_all_columns", + "01391_join_on_dict_crash", + "01392_column_resolve", "01396_inactive_replica_cleanup_nodes_zookeeper", "01412_cache_dictionary_race", "01414_mutations_and_errors_zookeeper", @@ -464,20 +702,48 @@ "01417_freeze_partition_verbose", "01417_freeze_partition_verbose_zookeeper", "01430_modify_sample_by_zookeeper", + "01444_create_table_drop_database_race", "01454_storagememory_data_race_challenge", + "01455_rank_correlation_spearman", "01456_modify_column_type_via_add_drop_update", "01457_create_as_table_function_structure", "01459_manual_write_to_replicas", "01460_DistributedFilesToInsert", "01465_ttl_recompression", + "01470_show_databases_like", "01471_calculate_ttl_during_merge", + "01487_distributed_in_not_default_db", "01493_alter_remove_properties_zookeeper", "01493_storage_set_persistency", "01494_storage_join_persistency", + "01501_cache_dictionary_all_fields", + "01507_clickhouse_server_start_with_embedded_config", + "01509_dictionary_preallocate", + "01516_create_table_primary_key", "01516_drop_table_stress", - "01541_max_memory_usage_for_user", - "01646_system_restart_replicas_smoke", // system restart replicas is a global query + "01517_drop_mv_with_inner_table", + "01526_complex_key_dict_direct_layout", + "01527_clickhouse_local_optimize", + "01527_dist_sharding_key_dictGet_reload", + "01530_drop_database_atomic_sync", + "01541_max_memory_usage_for_user_long", + "01542_dictionary_load_exception_race", + "01575_disable_detach_table_of_dictionary", + "01593_concurrent_alter_mutations_kill", + "01593_concurrent_alter_mutations_kill_many_replicas", "01600_count_of_parts_metrics", // tests global system metrics + "01600_detach_permanently", + "01600_log_queries_with_extensive_info", + "01600_multiple_left_join_with_aliases", + "01601_detach_permanently", + "01602_show_create_view", + "01603_rename_overwrite_bug", + "01646_system_restart_replicas_smoke", // system restart replicas is a global query + "01656_test_query_log_factories_info", + "01669_columns_declaration_serde", + "01676_dictget_in_default_expression", + "01700_system_zookeeper_path_in", + "01715_background_checker_blather_zookeeper", "attach", "ddl_dictionaries", "dictionary", @@ -485,6 +751,8 @@ "live_view", "memory_leak", "memory_limit", - "polygon_dicts" // they use an explicitly specified database + "polygon_dicts", // they use an explicitly specified database + "01658_read_file_to_stringcolumn", + "01721_engine_file_truncate_on_insert" // It's ok to execute in parallel but not several instances of the same test. ] } diff --git a/tests/testflows/aes_encryption/docker-compose/docker-compose.yml b/tests/testflows/aes_encryption/docker-compose/docker-compose.yml index 04a51ad7ec0..124b53bf502 100644 --- a/tests/testflows/aes_encryption/docker-compose/docker-compose.yml +++ b/tests/testflows/aes_encryption/docker-compose/docker-compose.yml @@ -56,7 +56,7 @@ services: zookeeper: condition: service_healthy - # dummy service which does nothing, but allows to postpone + # dummy service which does nothing, but allows to postpone # 'docker-compose up -d' till all dependecies will go healthy all_services_ready: image: hello-world diff --git a/tests/testflows/example/docker-compose/docker-compose.yml b/tests/testflows/example/docker-compose/docker-compose.yml index e7e57386dc4..4edb415824f 100644 --- a/tests/testflows/example/docker-compose/docker-compose.yml +++ b/tests/testflows/example/docker-compose/docker-compose.yml @@ -20,7 +20,7 @@ services: zookeeper: condition: service_healthy - # dummy service which does nothing, but allows to postpone + # dummy service which does nothing, but allows to postpone # 'docker-compose up -d' till all dependecies will go healthy all_services_ready: image: hello-world diff --git a/tests/testflows/helpers/cluster.py b/tests/testflows/helpers/cluster.py index 3be79132ec3..5e8717e7a8e 100755 --- a/tests/testflows/helpers/cluster.py +++ b/tests/testflows/helpers/cluster.py @@ -26,7 +26,7 @@ class Node(object): def repr(self): return f"Node(name='{self.name}')" - def restart(self, timeout=300, safe=True): + def restart(self, timeout=300, retries=5): """Restart node. """ with self.cluster.lock: @@ -35,15 +35,20 @@ class Node(object): shell = self.cluster._bash.pop(key) shell.__exit__(None, None, None) - self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout) + for retry in range(retries): + r = self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout) + if r.exitcode == 0: + break - def start(self, timeout=300, safe=True): + def start(self, timeout=300, retries=5): """Start node. """ - self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout) + for retry in range(retries): + r = self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout) + if r.exitcode == 0: + break - - def stop(self, timeout=300, safe=True): + def stop(self, timeout=300, retries=5): """Stop node. """ with self.cluster.lock: @@ -52,7 +57,10 @@ class Node(object): shell = self.cluster._bash.pop(key) shell.__exit__(None, None, None) - self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout) + for retry in range(retries): + r = self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout) + if r.exitcode == 0: + break def command(self, *args, **kwargs): return self.cluster.command(self.name, *args, **kwargs) @@ -71,7 +79,7 @@ class ClickHouseNode(Node): continue assert False, "container is not healthy" - def stop(self, timeout=300, safe=True): + def stop(self, timeout=300, safe=True, retries=5): """Stop node. """ if safe: @@ -89,17 +97,23 @@ class ClickHouseNode(Node): shell = self.cluster._bash.pop(key) shell.__exit__(None, None, None) - self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout) + for retry in range(retries): + r = self.cluster.command(None, f'{self.cluster.docker_compose} stop {self.name}', timeout=timeout) + if r.exitcode == 0: + break - def start(self, timeout=300, wait_healthy=True): + def start(self, timeout=300, wait_healthy=True, retries=5): """Start node. """ - self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout) + for retry in range(retries): + r = self.cluster.command(None, f'{self.cluster.docker_compose} start {self.name}', timeout=timeout) + if r.exitcode == 0: + break if wait_healthy: self.wait_healthy(timeout) - def restart(self, timeout=300, safe=True, wait_healthy=True): + def restart(self, timeout=300, safe=True, wait_healthy=True, retries=5): """Restart node. """ if safe: @@ -117,7 +131,10 @@ class ClickHouseNode(Node): shell = self.cluster._bash.pop(key) shell.__exit__(None, None, None) - self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout) + for retry in range(retries): + r = self.cluster.command(None, f'{self.cluster.docker_compose} restart {self.name}', timeout=timeout) + if r.exitcode == 0: + break if wait_healthy: self.wait_healthy(timeout) diff --git a/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml b/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml index c8ff683df58..36e25ef766e 100644 --- a/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml +++ b/tests/testflows/ldap/authentication/docker-compose/docker-compose.yml @@ -135,7 +135,7 @@ services: zookeeper: condition: service_healthy - # dummy service which does nothing, but allows to postpone + # dummy service which does nothing, but allows to postpone # 'docker-compose up -d' till all dependecies will go healthy all_services_ready: image: hello-world diff --git a/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml b/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml index 139907c513c..e489637b8c9 100644 --- a/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml +++ b/tests/testflows/ldap/authentication/docker-compose/openldap-service.yml @@ -28,7 +28,7 @@ services: environment: PHPLDAPADMIN_HTTPS=false: ports: - - "8080:80" + - "8080:80" healthcheck: test: echo 1 interval: 10s @@ -37,4 +37,3 @@ services: start_period: 300s security_opt: - label:disable - diff --git a/tests/testflows/ldap/authentication/tests/common.py b/tests/testflows/ldap/authentication/tests/common.py index 8efb389a23f..7f9f16e827c 100644 --- a/tests/testflows/ldap/authentication/tests/common.py +++ b/tests/testflows/ldap/authentication/tests/common.py @@ -270,7 +270,7 @@ def ldap_authenticated_users(*users, config_d_dir="/etc/clickhouse-server/users. config = create_ldap_users_config_content(*users, config_d_dir=config_d_dir, config_file=config_file) return add_config(config, restart=restart) -def invalid_server_config(servers, message=None, tail=13, timeout=60): +def invalid_server_config(servers, message=None, tail=30, timeout=60): """Check that ClickHouse errors when trying to load invalid LDAP servers configuration file. """ node = current().context.node @@ -299,7 +299,7 @@ def invalid_server_config(servers, message=None, tail=13, timeout=60): with By("removing the config file", description=config.path): node.command(f"rm -rf {config.path}", exitcode=0) -def invalid_user_config(servers, config, message=None, tail=13, timeout=60): +def invalid_user_config(servers, config, message=None, tail=30, timeout=60): """Check that ClickHouse errors when trying to load invalid LDAP users configuration file. """ node = current().context.node diff --git a/tests/testflows/ldap/authentication/tests/server_config.py b/tests/testflows/ldap/authentication/tests/server_config.py index 38ec859226b..4053b5f61ed 100644 --- a/tests/testflows/ldap/authentication/tests/server_config.py +++ b/tests/testflows/ldap/authentication/tests/server_config.py @@ -245,7 +245,7 @@ def invalid_verification_cooldown_value(self, invalid_value, timeout=20): }} with When("I try to use this configuration then it should not work"): - invalid_server_config(servers, message=error_message, tail=17, timeout=timeout) + invalid_server_config(servers, message=error_message, tail=30, timeout=timeout) @TestScenario @Requirements( diff --git a/tests/testflows/ldap/authentication/tests/user_config.py b/tests/testflows/ldap/authentication/tests/user_config.py index 36ed33ed17a..0f296ea31c6 100644 --- a/tests/testflows/ldap/authentication/tests/user_config.py +++ b/tests/testflows/ldap/authentication/tests/user_config.py @@ -39,7 +39,7 @@ def empty_server_name(self, timeout=20): "message": "DB::Exception: user1: Authentication failed: password is incorrect or there is no user with such name" }] config = create_ldap_users_config_content(*users) - invalid_user_config(servers, config, message=message, tail=15, timeout=timeout) + invalid_user_config(servers, config, message=message, tail=30, timeout=timeout) @TestScenario @Requirements( @@ -147,7 +147,7 @@ def ldap_and_password(self): error_message = "DB::Exception: More than one field of 'password'" with Then("I expect an error when I try to load the configuration file", description=error_message): - invalid_user_config(servers, new_config, message=error_message, tail=16) + invalid_user_config(servers, new_config, message=error_message, tail=30) @TestFeature @Name("user config") diff --git a/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml b/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml index c8ff683df58..36e25ef766e 100644 --- a/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml +++ b/tests/testflows/ldap/external_user_directory/docker-compose/docker-compose.yml @@ -135,7 +135,7 @@ services: zookeeper: condition: service_healthy - # dummy service which does nothing, but allows to postpone + # dummy service which does nothing, but allows to postpone # 'docker-compose up -d' till all dependecies will go healthy all_services_ready: image: hello-world diff --git a/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml b/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml index 139907c513c..e489637b8c9 100644 --- a/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml +++ b/tests/testflows/ldap/external_user_directory/docker-compose/openldap-service.yml @@ -28,7 +28,7 @@ services: environment: PHPLDAPADMIN_HTTPS=false: ports: - - "8080:80" + - "8080:80" healthcheck: test: echo 1 interval: 10s @@ -37,4 +37,3 @@ services: start_period: 300s security_opt: - label:disable - diff --git a/tests/testflows/ldap/external_user_directory/tests/common.py b/tests/testflows/ldap/external_user_directory/tests/common.py index e5980640721..23a8d68be0d 100644 --- a/tests/testflows/ldap/external_user_directory/tests/common.py +++ b/tests/testflows/ldap/external_user_directory/tests/common.py @@ -133,7 +133,7 @@ def create_entries_ldap_external_user_directory_config_content(entries, config_d return Config(content, path, name, uid, "config.xml") -def invalid_ldap_external_user_directory_config(server, roles, message, tail=20, timeout=60, config=None): +def invalid_ldap_external_user_directory_config(server, roles, message, tail=30, timeout=60, config=None): """Check that ClickHouse errors when trying to load invalid LDAP external user directory configuration file. """ diff --git a/tests/testflows/ldap/external_user_directory/tests/server_config.py b/tests/testflows/ldap/external_user_directory/tests/server_config.py index 4e2e586f77c..8d0d1db976a 100644 --- a/tests/testflows/ldap/external_user_directory/tests/server_config.py +++ b/tests/testflows/ldap/external_user_directory/tests/server_config.py @@ -41,7 +41,7 @@ def invalid_host(self): RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Invalid("1.0"), RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Host("1.0") ) -def empty_host(self, tail=20, timeout=60): +def empty_host(self, tail=30, timeout=60): """Check that server returns an error when LDAP server host value is empty. """ @@ -50,14 +50,14 @@ def empty_host(self, tail=20, timeout=60): servers = {"foo": {"host": "", "port": "389", "enable_tls": "no"}} - invalid_server_config(servers, message=message, tail=16, timeout=timeout) + invalid_server_config(servers, message=message, tail=30, timeout=timeout) @TestScenario @Requirements( RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Invalid("1.0"), RQ_SRS_009_LDAP_ExternalUserDirectory_Configuration_Server_Host("1.0") ) -def missing_host(self, tail=20, timeout=60): +def missing_host(self, tail=30, timeout=60): """Check that server returns an error when LDAP server host is missing. """ @@ -148,7 +148,7 @@ def invalid_enable_tls_value(self, timeout=60): servers = {"openldap1": {"host": "openldap1", "port": "389", "enable_tls": "foo", "auth_dn_prefix": "cn=", "auth_dn_suffix": ",ou=users,dc=company,dc=com" }} - invalid_server_config(servers, message=message, tail=18, timeout=timeout) + invalid_server_config(servers, message=message, tail=30, timeout=timeout) @TestScenario @Requirements( @@ -259,7 +259,7 @@ def invalid_verification_cooldown_value(self, invalid_value, timeout=20): }} with When("I try to use this configuration then it should not work"): - invalid_server_config(servers, message=error_message, tail=17, timeout=timeout) + invalid_server_config(servers, message=error_message, tail=30, timeout=timeout) @TestScenario @Requirements( diff --git a/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml b/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml index c8ff683df58..36e25ef766e 100644 --- a/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml +++ b/tests/testflows/ldap/role_mapping/docker-compose/docker-compose.yml @@ -135,7 +135,7 @@ services: zookeeper: condition: service_healthy - # dummy service which does nothing, but allows to postpone + # dummy service which does nothing, but allows to postpone # 'docker-compose up -d' till all dependecies will go healthy all_services_ready: image: hello-world diff --git a/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml b/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml index 139907c513c..e489637b8c9 100644 --- a/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml +++ b/tests/testflows/ldap/role_mapping/docker-compose/openldap-service.yml @@ -28,7 +28,7 @@ services: environment: PHPLDAPADMIN_HTTPS=false: ports: - - "8080:80" + - "8080:80" healthcheck: test: echo 1 interval: 10s @@ -37,4 +37,3 @@ services: start_period: 300s security_opt: - label:disable - diff --git a/tests/testflows/ldap/role_mapping/regression.py b/tests/testflows/ldap/role_mapping/regression.py index fff1e72a945..7afb6c98713 100755 --- a/tests/testflows/ldap/role_mapping/regression.py +++ b/tests/testflows/ldap/role_mapping/regression.py @@ -18,7 +18,7 @@ xfails = { @Name("role mapping") @ArgumentParser(argparser) @Specifications( - QA_SRS014_ClickHouse_LDAP_Role_Mapping + SRS_014_ClickHouse_LDAP_Role_Mapping ) @Requirements( RQ_SRS_014_LDAP_RoleMapping("1.0") diff --git a/tests/testflows/ldap/role_mapping/requirements/requirements.md b/tests/testflows/ldap/role_mapping/requirements/requirements.md new file mode 100644 index 00000000000..e79baa9cd7c --- /dev/null +++ b/tests/testflows/ldap/role_mapping/requirements/requirements.md @@ -0,0 +1,504 @@ +# SRS-014 ClickHouse LDAP Role Mapping +# Software Requirements Specification + +## Table of Contents + +* 1 [Revision History](#revision-history) +* 2 [Introduction](#introduction) +* 3 [Terminology](#terminology) + * 3.1 [LDAP](#ldap) +* 4 [Requirements](#requirements) + * 4.1 [General](#general) + * 4.1.1 [RQ.SRS-014.LDAP.RoleMapping](#rqsrs-014ldaprolemapping) + * 4.1.2 [RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles](#rqsrs-014ldaprolemappingwithfixedroles) + * 4.1.3 [RQ.SRS-014.LDAP.RoleMapping.Search](#rqsrs-014ldaprolemappingsearch) + * 4.2 [Mapped Role Names](#mapped-role-names) + * 4.2.1 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters](#rqsrs-014ldaprolemappingmaprolenamewithutf8characters) + * 4.2.2 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long](#rqsrs-014ldaprolemappingmaprolenamelong) + * 4.2.3 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters](#rqsrs-014ldaprolemappingmaprolenamewithspecialxmlcharacters) + * 4.2.4 [RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters](#rqsrs-014ldaprolemappingmaprolenamewithspecialregexcharacters) + * 4.3 [Multiple Roles](#multiple-roles) + * 4.3.1 [RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles](#rqsrs-014ldaprolemappingmapmultipleroles) + * 4.4 [LDAP Groups](#ldap-groups) + * 4.4.1 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed](#rqsrs-014ldaprolemappingldapgroupremoved) + * 4.4.2 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingldapgroupremovedandaddedparallel) + * 4.4.3 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved](#rqsrs-014ldaprolemappingldapgroupuserremoved) + * 4.4.4 [RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingldapgroupuserremovedandaddedparallel) + * 4.5 [RBAC Roles](#rbac-roles) + * 4.5.1 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent](#rqsrs-014ldaprolemappingrbacrolenotpresent) + * 4.5.2 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added](#rqsrs-014ldaprolemappingrbacroleadded) + * 4.5.3 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed](#rqsrs-014ldaprolemappingrbacroleremoved) + * 4.5.4 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded](#rqsrs-014ldaprolemappingrbacrolereadded) + * 4.5.5 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel](#rqsrs-014ldaprolemappingrbacroleremovedandaddedparallel) + * 4.5.6 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New](#rqsrs-014ldaprolemappingrbacrolenew) + * 4.5.7 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege](#rqsrs-014ldaprolemappingrbacrolenewprivilege) + * 4.5.8 [RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege](#rqsrs-014ldaprolemappingrbacroleremovedprivilege) + * 4.6 [Authentication](#authentication) + * 4.6.1 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel](#rqsrs-014ldaprolemappingauthenticationparallel) + * 4.6.2 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid](#rqsrs-014ldaprolemappingauthenticationparallelvalidandinvalid) + * 4.6.3 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers](#rqsrs-014ldaprolemappingauthenticationparallelmultipleservers) + * 4.6.4 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly](#rqsrs-014ldaprolemappingauthenticationparallellocalonly) + * 4.6.5 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP](#rqsrs-014ldaprolemappingauthenticationparallellocalandmultipleldap) + * 4.6.6 [RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser](#rqsrs-014ldaprolemappingauthenticationparallelsameuser) + * 4.7 [Server Configuration](#server-configuration) + * 4.7.1 [BindDN Parameter](#binddn-parameter) + * 4.7.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN](#rqsrs-014ldaprolemappingconfigurationserverbinddn) + * 4.7.1.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN](#rqsrs-014ldaprolemappingconfigurationserverbinddnconflictwithauthdn) + * 4.8 [External User Directory Configuration](#external-user-directory-configuration) + * 4.8.1 [Syntax](#syntax) + * 4.8.1.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsyntax) + * 4.8.2 [Special Characters Escaping](#special-characters-escaping) + * 4.8.2.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingspecialcharactersescaping) + * 4.8.3 [Multiple Sections](#multiple-sections) + * 4.8.3.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingmultiplesections) + * 4.8.3.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingmultiplesectionsidenticalparameters) + * 4.8.4 [BaseDN Parameter](#basedn-parameter) + * 4.8.4.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingbasedn) + * 4.8.5 [Attribute Parameter](#attribute-parameter) + * 4.8.5.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingattribute) + * 4.8.6 [Scope Parameter](#scope-parameter) + * 4.8.6.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscope) + * 4.8.6.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluebase) + * 4.8.6.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevalueonelevel) + * 4.8.6.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluechildren) + * 4.8.6.5 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluesubtree) + * 4.8.6.6 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingscopevaluedefault) + * 4.8.7 [Search Filter Parameter](#search-filter-parameter) + * 4.8.7.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingsearchfilter) + * 4.8.8 [Prefix Parameter](#prefix-parameter) + * 4.8.8.1 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefix) + * 4.8.8.2 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixdefault) + * 4.8.8.3 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithutf8characters) + * 4.8.8.4 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithspecialxmlcharacters) + * 4.8.8.5 [RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters](#rqsrs-014ldaprolemappingconfigurationuserdirectoryrolemappingprefixwithspecialregexcharacters) +* 5 [References](#references) + +## Revision History + +This document is stored in an electronic form using [Git] source control management software +hosted in a [GitHub Repository]. +All the updates are tracked using the [Revision History]. + +## Introduction + +The [SRS-007 ClickHouse Authentication of Users via LDAP] added support for authenticating +users using an [LDAP] server and the [SRS-009 ClickHouse LDAP External User Directory] added +support for authenticating users using an [LDAP] external user directory. + +This requirements specification adds additional functionality for mapping [LDAP] groups to +the corresponding [ClickHouse] [RBAC] roles when [LDAP] external user directory is configured. +This functionality will enable easier access management for [LDAP] authenticated users +as the privileges granted by the roles can be granted or revoked by granting or revoking +a corresponding [LDAP] group to one or more [LDAP] users. + +For the use case when only [LDAP] user authentication is used, the roles can be +managed using [RBAC] in the same way as for non-[LDAP] authenticated users. + +## Terminology + +### LDAP + +* Lightweight Directory Access Protocol + +## Requirements + +### General + +#### RQ.SRS-014.LDAP.RoleMapping +version: 1.0 + +[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles +for users authenticated using [LDAP] external user directory. + +#### RQ.SRS-014.LDAP.RoleMapping.WithFixedRoles +version: 1.0 + +[ClickHouse] SHALL support mapping of [LDAP] groups to [RBAC] roles +for users authenticated using [LDAP] external user directory when +one or more roles are specified in the `` section. + +#### RQ.SRS-014.LDAP.RoleMapping.Search +version: 1.0 + +[ClickHouse] SHALL perform search on the [LDAP] server and map the results to [RBAC] role names +when authenticating users using the [LDAP] external user directory if the `` section is configured +as part of the [LDAP] external user directory. The matched roles SHALL be assigned to the user. + +### Mapped Role Names + +#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithUTF8Characters +version: 1.0 + +[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory +to an [RBAC] role that contains UTF-8 characters. + +#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.Long +version: 1.0 + +[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory +to an [RBAC] role that has a name with more than 128 characters. + +#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialXMLCharacters +version: 1.0 + +[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory +to an [RBAC] role that has a name that contains special characters that need to be escaped in XML. + +#### RQ.SRS-014.LDAP.RoleMapping.Map.Role.Name.WithSpecialRegexCharacters +version: 1.0 + +[ClickHouse] SHALL support mapping [LDAP] search results for users authenticated using [LDAP] external user directory +to an [RBAC] role that has a name that contains special characters that need to be escaped in regex. + +### Multiple Roles + +#### RQ.SRS-014.LDAP.RoleMapping.Map.MultipleRoles +version: 1.0 + +[ClickHouse] SHALL support mapping one or more [LDAP] search results for users authenticated using +[LDAP] external user directory to one or more [RBAC] role. + +### LDAP Groups + +#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.Removed +version: 1.0 + +[ClickHouse] SHALL not assign [RBAC] role(s) for any users authenticated using [LDAP] external user directory +if the corresponding [LDAP] group(s) that map those role(s) are removed. Any users that have active sessions SHALL still +have privileges provided by the role(s) until the next time they are authenticated. + +#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.RemovedAndAdded.Parallel +version: 1.0 + +[ClickHouse] SHALL support authenticating users using [LDAP] external user directory +when [LDAP] groups are removed and added +at the same time as [LDAP] user authentications are performed in parallel. + +#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemoved +version: 1.0 + +[ClickHouse] SHALL not assign [RBAC] role(s) for the user authenticated using [LDAP] external user directory +if the user has been removed from the corresponding [LDAP] group(s) that map those role(s). +Any active user sessions SHALL have privileges provided by the role(s) until the next time the user is authenticated. + +#### RQ.SRS-014.LDAP.RoleMapping.LDAP.Group.UserRemovedAndAdded.Parallel +version: 1.0 + +[ClickHouse] SHALL support authenticating users using [LDAP] external user directory +when [LDAP] users are added and removed from [LDAP] groups used to map to [RBAC] roles +at the same time as [LDAP] user authentications are performed in parallel. + +### RBAC Roles + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NotPresent +version: 1.0 + +[ClickHouse] SHALL not reject authentication attempt using [LDAP] external user directory if any of the roles that are +are mapped from [LDAP] but are not present locally. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Added +version: 1.0 + +[ClickHouse] SHALL add the privileges provided by the [LDAP] mapped role when the +role is not present during user authentication using [LDAP] external user directory +as soon as the role is added. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Removed +version: 1.0 + +[ClickHouse] SHALL remove the privileges provided by the role from all the +users authenticated using [LDAP] external user directory if the [RBAC] role that was mapped +as a result of [LDAP] search is removed. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.Readded +version: 1.0 + +[ClickHouse] SHALL reassign the [RBAC] role and add all the privileges provided by the role +when it is re-added after removal for all [LDAP] users authenticated using external user directory +for any role that was mapped as a result of [LDAP] search. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedAndAdded.Parallel +version: 1.0 + +[ClickHouse] SHALL support authenticating users using [LDAP] external user directory +when [RBAC] roles that are mapped by [LDAP] groups +are added and removed at the same time as [LDAP] user authentications are performed in parallel. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.New +version: 1.0 + +[ClickHouse] SHALL not allow any new roles to be assigned to any +users authenticated using [LDAP] external user directory unless the role is specified +in the configuration of the external user directory or was mapped as a result of [LDAP] search. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.NewPrivilege +version: 1.0 + +[ClickHouse] SHALL add new privilege to all the users authenticated using [LDAP] external user directory +when new privilege is added to one of the roles that were mapped as a result of [LDAP] search. + +#### RQ.SRS-014.LDAP.RoleMapping.RBAC.Role.RemovedPrivilege +version: 1.0 + +[ClickHouse] SHALL remove privilege from all the users authenticated using [LDAP] external user directory +when the privilege that was provided by the mapped role is removed from all the roles +that were mapped as a result of [LDAP] search. + +### Authentication + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of users using [LDAP] server +when using [LDAP] external user directory that has role mapping enabled. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.ValidAndInvalid +version: 1.0 + +[ClickHouse] SHALL support authentication of valid users and +prohibit authentication of invalid users using [LDAP] server +in parallel without having invalid attempts affecting valid authentications +when using [LDAP] external user directory that has role mapping enabled. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.MultipleServers +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of external [LDAP] users +authenticated using multiple [LDAP] external user directories that have +role mapping enabled. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalOnly +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of users defined only locally +when one or more [LDAP] external user directories with role mapping +are specified in the configuration file. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.LocalAndMultipleLDAP +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of local and external [LDAP] users +authenticated using multiple [LDAP] external user directories with role mapping enabled. + +#### RQ.SRS-014.LDAP.RoleMapping.Authentication.Parallel.SameUser +version: 1.0 + +[ClickHouse] SHALL support parallel authentication of the same external [LDAP] user +authenticated using the same [LDAP] external user directory with role mapping enabled. + +### Server Configuration + +#### BindDN Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` section +of the `config.xml` that SHALL be used to construct the `DN` to bind to. +The resulting `DN` SHALL be constructed by replacing all `{user_name}` substrings of the template +with the actual user name during each authentication attempt. + +For example, + +```xml + + + + + uid={user_name},ou=users,dc=example,dc=com + + + + +``` + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.Server.BindDN.ConflictWith.AuthDN +version: 1.0 + +[ClickHouse] SHALL return an error if both `` and `` or `` parameters +are specified as part of [LDAP] server description in the `` section of the `config.xml`. + +### External User Directory Configuration + +#### Syntax + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Syntax +version: 1.0 + +[ClickHouse] SHALL support the `role_mapping` sub-section in the `` section +of the `config.xml`. + +For example, + +```xml + + + + + + ou=groups,dc=example,dc=com + cn + subtree + (&(objectClass=groupOfNames)(member={bind_dn})) + clickhouse_ + + + + +``` + +#### Special Characters Escaping + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SpecialCharactersEscaping +version: 1.0 + +[ClickHouse] SHALL support properly escaped special XML characters that can be present +as part of the values for different configuration parameters inside the +`` section of the `config.xml` such as + +* `` parameter +* `` parameter + +#### Multiple Sections + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections +version: 1.0 + +[ClickHouse] SHALL support multiple `` sections defined inside the same `` section +of the `config.xml` and all of the `` sections SHALL be applied. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.MultipleSections.IdenticalParameters +version: 1.0 + +[ClickHouse] SHALL not duplicate mapped roles when multiple `` sections +with identical parameters are defined inside the `` section +of the `config.xml`. + +#### BaseDN Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.BaseDN +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` section +of the `config.xml` that SHALL specify the template to be used to construct the base `DN` for the [LDAP] search. + +The resulting `DN` SHALL be constructed by replacing all the `{user_name}` and `{bind_dn}` substrings of +the template with the actual user name and bind `DN` during each [LDAP] search. + +#### Attribute Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Attribute +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` section of +the `config.xml` that SHALL specify the name of the attribute whose values SHALL be returned by the [LDAP] search. + +#### Scope Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` section of +the `config.xml` that SHALL define the scope of the LDAP search as defined +by the https://ldapwiki.com/wiki/LDAP%20Search%20Scopes. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Base +version: 1.0 + +[ClickHouse] SHALL support the `base` value for the the `` parameter in the +`` section of the `config.xml` that SHALL +limit the scope as specified by the https://ldapwiki.com/wiki/BaseObject. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.OneLevel +version: 1.0 + +[ClickHouse] SHALL support the `one_level` value for the the `` parameter in the +`` section of the `config.xml` that SHALL +limit the scope as specified by the https://ldapwiki.com/wiki/SingleLevel. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Children +version: 1.0 + +[ClickHouse] SHALL support the `children` value for the the `` parameter in the +`` section of the `config.xml` that SHALL +limit the scope as specified by the https://ldapwiki.com/wiki/SubordinateSubtree. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Subtree +version: 1.0 + +[ClickHouse] SHALL support the `children` value for the the `` parameter in the +`` section of the `config.xml` that SHALL +limit the scope as specified by the https://ldapwiki.com/wiki/WholeSubtree. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Scope.Value.Default +version: 1.0 + +[ClickHouse] SHALL support the `subtree` as the default value for the the `` parameter in the +`` section of the `config.xml` when the `` parameter is not specified. + +#### Search Filter Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.SearchFilter +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` +section of the `config.xml` that SHALL specify the template used to construct +the [LDAP filter](https://ldap.com/ldap-filters/) for the search. + +The resulting filter SHALL be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}` substrings +of the template with the actual user name, bind `DN`, and base `DN` during each the [LDAP] search. + +#### Prefix Parameter + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix +version: 1.0 + +[ClickHouse] SHALL support the `` parameter in the `` +section of the `config.xml` that SHALL be expected to be in front of each string in +the original list of strings returned by the [LDAP] search. +Prefix SHALL be removed from the original strings and resulting strings SHALL be treated as [RBAC] role names. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.Default +version: 1.0 + +[ClickHouse] SHALL support empty string as the default value of the `` parameter in +the `` section of the `config.xml`. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithUTF8Characters +version: 1.0 + +[ClickHouse] SHALL support UTF8 characters as the value of the `` parameter in +the `` section of the `config.xml`. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialXMLCharacters +version: 1.0 + +[ClickHouse] SHALL support XML special characters as the value of the `` parameter in +the `` section of the `config.xml`. + +##### RQ.SRS-014.LDAP.RoleMapping.Configuration.UserDirectory.RoleMapping.Prefix.WithSpecialRegexCharacters +version: 1.0 + +[ClickHouse] SHALL support regex special characters as the value of the `` parameter in +the `` section of the `config.xml`. + +## References + +* **Access Control and Account Management**: https://clickhouse.tech/docs/en/operations/access-rights/ +* **LDAP**: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol +* **ClickHouse:** https://clickhouse.tech +* **GitHub Repository**: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md +* **Revision History**: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md +* **Git:** https://git-scm.com/ + +[RBAC]: https://clickhouse.tech/docs/en/operations/access-rights/ +[SRS]: #srs +[Access Control and Account Management]: https://clickhouse.tech/docs/en/operations/access-rights/ +[SRS-009 ClickHouse LDAP External User Directory]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/external_user_directory/requirements/requirements.md +[SRS-007 ClickHouse Authentication of Users via LDAP]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/authentication/requirements/requirements.md +[LDAP]: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol +[ClickHouse]: https://clickhouse.tech +[GitHub Repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md +[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md +[Git]: https://git-scm.com/ +[GitHub]: https://github.com diff --git a/tests/testflows/ldap/role_mapping/requirements/requirements.py b/tests/testflows/ldap/role_mapping/requirements/requirements.py index ca7192e9dad..b2748762e03 100644 --- a/tests/testflows/ldap/role_mapping/requirements/requirements.py +++ b/tests/testflows/ldap/role_mapping/requirements/requirements.py @@ -1,6 +1,6 @@ # These requirements were auto generated # from software requirements specification (SRS) -# document by TestFlows v1.6.210101.1235930. +# document by TestFlows v1.6.210129.1222545. # Do not edit by hand but re-generate instead # using 'tfs requirements generate' command. from testflows.core import Specification @@ -814,15 +814,15 @@ RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithS level=4, num='4.8.8.5') -QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification( - name='QA-SRS014 ClickHouse LDAP Role Mapping', +SRS_014_ClickHouse_LDAP_Role_Mapping = Specification( + name='SRS-014 ClickHouse LDAP Role Mapping', description=None, - author='vzakaznikov', - date='December 4, 2020', - status='-', - approved_by='-', - approved_date='-', - approved_version='-', + author=None, + date=None, + status=None, + approved_by=None, + approved_date=None, + approved_version=None, version=None, group=None, type=None, @@ -950,27 +950,9 @@ QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification( RQ_SRS_014_LDAP_RoleMapping_Configuration_UserDirectory_RoleMapping_Prefix_WithSpecialRegexCharacters, ), content=''' -# QA-SRS014 ClickHouse LDAP Role Mapping +# SRS-014 ClickHouse LDAP Role Mapping # Software Requirements Specification -(c) 2020 Altinity LTD. All Rights Reserved. - -**Document status:** Confidential - -**Author:** vzakaznikov - -**Date:** December 4, 2020 - -## Approval - -**Status:** - - -**Version:** - - -**Approved by:** - - -**Date:** - - ## Table of Contents * 1 [Revision History](#revision-history) @@ -1046,13 +1028,13 @@ QA_SRS014_ClickHouse_LDAP_Role_Mapping = Specification( ## Revision History This document is stored in an electronic form using [Git] source control management software -hosted in a [GitLab Repository]. +hosted in a [GitHub Repository]. All the updates are tracked using the [Revision History]. ## Introduction -The [QA-SRS007 ClickHouse Authentication of Users via LDAP] added support for authenticating -users using an [LDAP] server and the [QA-SRS009 ClickHouse LDAP External User Directory] added +The [SRS-007 ClickHouse Authentication of Users via LDAP] added support for authenticating +users using an [LDAP] server and the [SRS-009 ClickHouse LDAP External User Directory] added support for authenticating users using an [LDAP] external user directory. This requirements specification adds additional functionality for mapping [LDAP] groups to @@ -1457,19 +1439,19 @@ the `` section of the `config.xml`. * **Access Control and Account Management**: https://clickhouse.tech/docs/en/operations/access-rights/ * **LDAP**: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol * **ClickHouse:** https://clickhouse.tech -* **GitLab Repository**: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/blob/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md -* **Revision History**: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/commits/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md +* **GitHub Repository**: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md +* **Revision History**: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md * **Git:** https://git-scm.com/ [RBAC]: https://clickhouse.tech/docs/en/operations/access-rights/ [SRS]: #srs [Access Control and Account Management]: https://clickhouse.tech/docs/en/operations/access-rights/ -[QA-SRS009 ClickHouse LDAP External User Directory]: https://gitlab.com/altinity-qa/documents/qa-srs009-clickhouse-ldap-external-user-directory/-/blob/master/QA_SRS009_ClickHouse_LDAP_External_User_Directory.md -[QA-SRS007 ClickHouse Authentication of Users via LDAP]: https://gitlab.com/altinity-qa/documents/qa-srs007-clickhouse-athentication-of-users-via-ldap/-/blob/master/QA_SRS007_ClickHouse_Authentication_of_Users_via_LDAP.md +[SRS-009 ClickHouse LDAP External User Directory]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/external_user_directory/requirements/requirements.md +[SRS-007 ClickHouse Authentication of Users via LDAP]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/authentication/requirements/requirements.md [LDAP]: https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol [ClickHouse]: https://clickhouse.tech -[GitLab Repository]: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/blob/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md -[Revision History]: https://gitlab.com/altinity-qa/documents/qa-srs014-clickhouse-ldap-role-mapping/-/commits/master/QA_SRS014_ClickHouse_LDAP_Role_Mapping.md +[GitHub Repository]: https://github.com/ClickHouse/ClickHouse/blob/master/tests/testflows/ldap/role_mapping/requirements/requirements.md +[Revision History]: https://github.com/ClickHouse/ClickHouse/commits/master/tests/testflows/ldap/role_mapping/requirements/requirements.md [Git]: https://git-scm.com/ -[GitLab]: https://gitlab.com +[GitHub]: https://github.com ''') diff --git a/tests/testflows/ldap/role_mapping/tests/server_config.py b/tests/testflows/ldap/role_mapping/tests/server_config.py index 85fe33f4388..8008d9003d7 100644 --- a/tests/testflows/ldap/role_mapping/tests/server_config.py +++ b/tests/testflows/ldap/role_mapping/tests/server_config.py @@ -65,7 +65,7 @@ def bind_dn_conflict_with_auth_dn(self, timeout=60): } } - invalid_server_config(servers, message=message, tail=18, timeout=timeout) + invalid_server_config(servers, message=message, tail=30, timeout=timeout) @TestFeature @@ -75,4 +75,4 @@ def feature(self, node="clickhouse1"): """ self.context.node = self.context.cluster.node(node) for scenario in loads(current_module(), Scenario): - scenario() \ No newline at end of file + scenario() diff --git a/tests/testflows/rbac/docker-compose/docker-compose.yml b/tests/testflows/rbac/docker-compose/docker-compose.yml index a3f5144c9ed..29f2ef52470 100755 --- a/tests/testflows/rbac/docker-compose/docker-compose.yml +++ b/tests/testflows/rbac/docker-compose/docker-compose.yml @@ -57,4 +57,4 @@ services: clickhouse3: condition: service_healthy zookeeper: - condition: service_healthy \ No newline at end of file + condition: service_healthy diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp index ccdc4cd168c..04dfb56ff08 100644 --- a/utils/check-mysql-binlog/main.cpp +++ b/utils/check-mysql-binlog/main.cpp @@ -69,21 +69,27 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody( case DB::MySQLReplication::WRITE_ROWS_EVENT_V1: case DB::MySQLReplication::WRITE_ROWS_EVENT_V2: { - event = std::make_shared(last_table_map_event, std::move(header)); + DB::MySQLReplication::RowsEventHeader rows_header(header.type); + rows_header.parse(*event_payload); + event = std::make_shared(last_table_map_event, std::move(header), rows_header); event->parseEvent(*event_payload); break; } case DB::MySQLReplication::DELETE_ROWS_EVENT_V1: case DB::MySQLReplication::DELETE_ROWS_EVENT_V2: { - event = std::make_shared(last_table_map_event, std::move(header)); + DB::MySQLReplication::RowsEventHeader rows_header(header.type); + rows_header.parse(*event_payload); + event = std::make_shared(last_table_map_event, std::move(header), rows_header); event->parseEvent(*event_payload); break; } case DB::MySQLReplication::UPDATE_ROWS_EVENT_V1: case DB::MySQLReplication::UPDATE_ROWS_EVENT_V2: { - event = std::make_shared(last_table_map_event, std::move(header)); + DB::MySQLReplication::RowsEventHeader rows_header(header.type); + rows_header.parse(*event_payload); + event = std::make_shared(last_table_map_event, std::move(header), rows_header); event->parseEvent(*event_payload); break; } diff --git a/utils/check-style/check-style b/utils/check-style/check-style index 9e2b5fc6fef..f8926a9af2f 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -70,7 +70,11 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.xml' | xargs xmllint --noout --nonet # FIXME: for now only clickhouse-test -pylint --score=n $ROOT_PATH/tests/clickhouse-test +pylint --rcfile=$ROOT_PATH/.pylintrc --score=n $ROOT_PATH/tests/clickhouse-test + +find $ROOT_PATH -not -path $ROOT_PATH'/contrib*' \( -name '*.yaml' -or -name '*.yml' \) -type f | + grep -vP $EXCLUDE_DIRS | + xargs yamllint --config-file=$ROOT_PATH/.yamllint # Machine translation to Russian is strictly prohibited find $ROOT_PATH/docs/ru -name '*.md' | @@ -107,7 +111,23 @@ find $ROOT_PATH -not -path $ROOT_PATH'/docker*' -not -path $ROOT_PATH'/contrib*' # Check that ya.make files are auto-generated "$ROOT_PATH"/utils/generate-ya-make/generate-ya-make.sh -git status -uno | grep ya.make && echo "ya.make files should be generated with utils/generate-ya-make/generate-ya-make.sh" +# FIXME: apparently sandbox (don't confuse it with docker) cloning sources +# using some ancient git version, <2.8, that contains one bug for submodules +# initialization [1]: +# +# " * A partial rewrite of "git submodule" in the 2.7 timeframe changed +# the way the gitdir: pointer in the submodules point at the real +# repository location to use absolute paths by accident. This has +# been corrected." +# +# [1]: https://github.com/git/git/blob/cf11a67975b057a144618badf16dc4e3d25b9407/Documentation/RelNotes/2.8.3.txt#L33-L36 +# +# Due to which "git status" will report the following error: +# +# fatal: not a git repository: /place/sandbox-data/tasks/0/2/882869720/ClickHouse/.git/modules/contrib/AMQP-CPP +# +# Anyway this check does not requires any submodule traverse, so it is fine to ignore those errors. +git status -uno 2> >(grep "fatal: not a git repository: /place/sandbox-data/tasks/.*/ClickHouse/\\.git/modules/contrib") | grep ya.make && echo "ya.make files should be generated with utils/generate-ya-make/generate-ya-make.sh" # Check that every header file has #pragma once in first line find $ROOT_PATH/{src,programs,utils} -name '*.h' | diff --git a/utils/check-style/check-style-all b/utils/check-style/check-style-all new file mode 100755 index 00000000000..c34224e5469 --- /dev/null +++ b/utils/check-style/check-style-all @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +dir=$(dirname $0) +$dir/check-style -n +$dir/check-typos +$dir/check-whitespaces -n +$dir/check-duplicate-includes.sh +$dir/shellcheck-run.sh diff --git a/utils/convert-month-partitioned-parts/main.cpp b/utils/convert-month-partitioned-parts/main.cpp index bce1e08077c..0a697937eb6 100644 --- a/utils/convert-month-partitioned-parts/main.cpp +++ b/utils/convert-month-partitioned-parts/main.cpp @@ -97,6 +97,8 @@ void run(String part_path, String date_column, String dest_path) Poco::File(new_tmp_part_path_str + "checksums.txt").setWriteable(); WriteBufferFromFile checksums_out(new_tmp_part_path_str + "checksums.txt", 4096); checksums.write(checksums_out); + checksums_in.close(); + checksums_out.close(); Poco::File(new_tmp_part_path).renameTo(new_part_path.toString()); } diff --git a/utils/github/backport.py b/utils/github/backport.py index 576e3b069c2..7fddbbee241 100644 --- a/utils/github/backport.py +++ b/utils/github/backport.py @@ -62,7 +62,7 @@ class Backport: RE_NO_BACKPORT = re.compile(r'^v(\d+\.\d+)-no-backport$') RE_BACKPORTED = re.compile(r'^v(\d+\.\d+)-backported$') - # pull-requests are sorted by ancestry from the least recent. + # pull-requests are sorted by ancestry from the most recent. for pr in pull_requests: while repo.comparator(branches[-1][1]) >= repo.comparator(pr['mergeCommit']['oid']): logging.info("PR #{} is already inside {}. Dropping this branch for further PRs".format(pr['number'], branches[-1][0])) diff --git a/utils/github/local.py b/utils/github/local.py index a997721bc76..2ad8d4b8b71 100644 --- a/utils/github/local.py +++ b/utils/github/local.py @@ -6,15 +6,15 @@ import os import re -class RepositoryBase(object): +class RepositoryBase: def __init__(self, repo_path): import git self._repo = git.Repo(repo_path, search_parent_directories=(not repo_path)) - # commit comparator + # comparator of commits def cmp(x, y): - if x == y: + if str(x) == str(y): return 0 if self._repo.is_ancestor(x, y): return -1 diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index fc1cf7c1b67..4ba92864020 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,8 +1,18 @@ +v21.2.4.6-stable 2021-02-20 +v21.2.3.15-stable 2021-02-14 +v21.2.2.8-stable 2021-02-07 +v21.1.5.4-stable 2021-02-20 +v21.1.4.46-stable 2021-02-14 +v21.1.3.32-stable 2021-02-03 v21.1.2.15-stable 2021-01-18 +v20.12.7.3-stable 2021-02-20 +v20.12.6.29-stable 2021-02-14 +v20.12.5.18-stable 2021-02-03 v20.12.5.14-stable 2020-12-28 v20.12.4.5-stable 2020-12-24 v20.12.3.3-stable 2020-12-09 v20.12.2.1-stable 2020-12-09 +v20.11.7.16-stable 2021-02-03 v20.11.6.6-stable 2020-12-24 v20.11.5.18-stable 2020-12-06 v20.11.4.13-stable 2020-11-20 @@ -20,6 +30,7 @@ v20.9.5.5-stable 2020-11-13 v20.9.4.76-stable 2020-10-29 v20.9.3.45-stable 2020-10-09 v20.9.2.20-stable 2020-09-22 +v20.8.13.15-lts 2021-02-20 v20.8.12.2-lts 2021-01-16 v20.8.11.17-lts 2020-12-25 v20.8.10.13-lts 2020-12-24 diff --git a/utils/zookeeper-test/main.cpp b/utils/zookeeper-test/main.cpp index 8f8aac00866..bfd7df26726 100644 --- a/utils/zookeeper-test/main.cpp +++ b/utils/zookeeper-test/main.cpp @@ -127,18 +127,22 @@ void testCreateListWatchEvent(zkutil::ZooKeeper & zk) void testMultiRequest(zkutil::ZooKeeper & zk) { + std::cerr << "Testing multi request\n"; Coordination::Requests requests; requests.push_back(zkutil::makeCreateRequest("/data/multirequest", "aaa", zkutil::CreateMode::Persistent)); requests.push_back(zkutil::makeSetRequest("/data/multirequest", "bbb", -1)); zk.multi(requests); + std::cerr << "Multi executed\n"; try { requests.clear(); + std::cerr << "Testing bad multi\n"; requests.push_back(zkutil::makeCreateRequest("/data/multirequest", "qweqwe", zkutil::CreateMode::Persistent)); requests.push_back(zkutil::makeSetRequest("/data/multirequest", "bbb", -1)); requests.push_back(zkutil::makeSetRequest("/data/multirequest", "ccc", -1)); zk.multi(requests); + std::cerr << "Bad multi executed\n"; std::terminate(); } catch (...) @@ -147,6 +151,7 @@ void testMultiRequest(zkutil::ZooKeeper & zk) } checkEq(zk, "/data/multirequest", "bbb"); + std::cerr << "Multi request finished\n"; } std::mutex elements_mutex; diff --git a/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json new file mode 100644 index 00000000000..1217adbbff5 --- /dev/null +++ b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu.json @@ -0,0 +1,55 @@ +[ + { + "system": "Yandex Cloud 8vCPU", + "system_full": "Yandex Cloud Broadwell, 8 vCPU (4 threads), 64 GB RAM, 500 GB SSD", + "cpu_vendor": "Intel", + "time": "2021-02-05 00:00:00", + "kind": "cloud", + "result": + [ + [0.004, 0.003, 0.003], + [0.047, 0.030, 0.021], + [0.129, 0.066, 0.067], + [0.873, 0.098, 0.095], + [0.869, 0.247, 0.257], + [1.429, 0.818, 0.768], + [0.055, 0.042, 0.043], + [0.034, 0.025, 0.024], + [1.372, 1.003, 1.051], + [1.605, 1.281, 1.209], + [0.942, 0.503, 0.483], + [0.980, 0.537, 0.558], + [2.076, 1.664, 1.635], + [3.136, 2.235, 2.171], + [2.351, 1.973, 1.974], + [2.369, 2.170, 2.133], + [6.281, 5.576, 5.498], + [3.739, 3.481, 3.354], + [10.947, 10.225, 10.271], + [0.875, 0.111, 0.108], + [10.832, 1.844, 1.877], + [12.344, 2.330, 2.227], + [22.999, 5.000, 4.903], + [20.086, 2.390, 2.278], + [3.036, 0.722, 0.673], + [1.420, 0.602, 0.578], + [3.040, 0.728, 0.714], + [10.842, 1.874, 1.783], + [9.207, 2.809, 2.705], + [2.751, 2.703, 2.714], + [2.810, 1.675, 1.568], + [6.507, 2.449, 2.505], + [15.968, 15.014, 15.318], + [13.479, 7.951, 7.702], + [13.227, 7.791, 7.699], + [2.811, 2.723, 2.549], + [0.358, 0.249, 0.273], + [0.157, 0.099, 0.101], + [0.189, 0.088, 0.080], + [0.758, 0.544, 0.525], + [0.115, 0.033, 0.027], + [0.063, 0.048, 0.023], + [0.014, 0.011, 0.008] + ] + } +] diff --git a/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json new file mode 100644 index 00000000000..ace2442c86e --- /dev/null +++ b/website/benchmark/hardware/results/yandex_cloud_broadwell_8_vcpu_s3.json @@ -0,0 +1,55 @@ +[ + { + "system": "Yandex Cloud 8vCPU Object Storage", + "system_full": "Yandex Cloud Broadwell, 8 vCPU (4 threads), 64 GB RAM, Object Storage", + "cpu_vendor": "Intel", + "time": "2021-02-05 00:00:00", + "kind": "cloud", + "result": + [ + [0.007, 0.003, 0.003], + [0.214, 0.111, 0.096], + [1.239, 1.359, 0.718], + [3.056, 3.366, 1.869], + [1.946, 1.552, 2.450], + [4.804, 2.307, 2.398], + [0.198, 0.108, 0.114], + [0.141, 0.104, 0.100], + [2.755, 2.749, 3.608], + [3.140, 3.905, 3.830], + [2.353, 4.996, 1.637], + [3.796, 1.536, 1.724], + [3.565, 3.016, 3.381], + [4.962, 4.263, 4.352], + [4.210, 3.974, 4.318], + [3.884, 3.434, 3.124], + [10.451, 9.147, 7.526], + [6.288, 5.882, 7.714], + [15.239, 33.243, 17.968], + [1.645, 1.870, 3.230], + [10.980, 8.984, 7.589], + [14.345, 11.503, 12.449], + [17.687, 17.764, 18.984], + [76.606, 65.179, 94.215], + [5.833, 3.347, 3.127], + [3.815, 2.574, 2.402], + [4.916, 6.169, 5.731], + [7.961, 9.930, 8.555], + [5.995, 7.382, 6.054], + [3.113, 4.176, 3.172], + [5.077, 5.221, 5.709], + [8.990, 9.598, 6.272], + [17.832, 17.668, 17.276], + [11.846, 14.692, 13.225], + [12.544, 12.502, 12.725], + [3.604, 4.811, 3.267], + [0.738, 0.751, 0.862], + [0.718, 0.611, 0.561], + [2.125, 0.688, 0.522], + [1.469, 1.546, 1.373], + [1.382, 1.069, 0.976], + [1.353, 1.212, 1.119], + [0.045, 0.031, 0.041] + ] + } +] diff --git a/website/templates/footer.html b/website/templates/footer.html index 765ea63d528..1eaf519b58b 100644 --- a/website/templates/footer.html +++ b/website/templates/footer.html @@ -8,7 +8,7 @@ {{ _('ClickHouse source code is published under the Apache 2.0 License.') }} {{ _('Software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.') }}
- © 2016–2020 {{ _('Yandex LLC') }} + © 2016–2021 {{ _('Yandex LLC') }}
diff --git a/website/templates/index/community.html b/website/templates/index/community.html index e65f9ff0f86..20b09e7318b 100644 --- a/website/templates/index/community.html +++ b/website/templates/index/community.html @@ -66,7 +66,7 @@
-