diff --git a/.clang-format b/.clang-format index d8f273702c8..2da3911dced 100644 --- a/.clang-format +++ b/.clang-format @@ -21,7 +21,6 @@ ConstructorInitializerAllOnOneLineOrOnePerLine: true ExperimentalAutoDetectBinPacking: true UseTab: Never TabWidth: 4 -IndentWidth: 4 Standard: Cpp11 PointerAlignment: Middle MaxEmptyLinesToKeep: 2 diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 449abc9484d..4b201802cae 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -7,11 +7,11 @@ tests/ci/run_check.py ### Changelog category (leave one): - New Feature - Improvement -- Bug Fix (user-visible misbehavior in an official stable release) - Performance Improvement - Backward Incompatible Change - Build/Testing/Packaging Improvement - Documentation (changelog entry is not required) +- Bug Fix (user-visible misbehavior in an official stable release) - Not for changelog (changelog entry is not required) diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml index 0d81a7b303c..d69168b01ee 100644 --- a/.github/workflows/backport_branches.yml +++ b/.github/workflows/backport_branches.yml @@ -349,6 +349,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index ecd5b85d320..1182481c897 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -487,6 +487,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index f6d6d192f48..85d865252ad 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -118,9 +118,11 @@ jobs: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" SonarCloud: + # TODO: Remove if: whenever SonarCloud supports c++23 + if: ${{ false }} runs-on: [self-hosted, builder] env: - SONAR_SCANNER_VERSION: 4.7.0.2747 + SONAR_SCANNER_VERSION: 4.8.0.2856 SONAR_SERVER_URL: "https://sonarcloud.io" BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed CC: clang-15 @@ -173,4 +175,4 @@ jobs: --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ --define sonar.projectKey="ClickHouse_ClickHouse" \ --define sonar.organization="clickhouse-java" \ - --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" + --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" \ diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index ab0cbbb7ec1..506ed451b6d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -550,6 +550,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -1301,6 +1308,40 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + FunctionalStatelessTestReleaseAnalyzer: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/stateless_analyzer + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Stateless tests (release, analyzer) + REPO_COPY=${{runner.temp}}/stateless_analyzer/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Functional test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 functional_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" FunctionalStatelessTestReleaseS3_0: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -4748,6 +4789,7 @@ jobs: - FunctionalStatelessTestReleaseDatabaseReplicated2 - FunctionalStatelessTestReleaseDatabaseReplicated3 - FunctionalStatelessTestReleaseWideParts + - FunctionalStatelessTestReleaseAnalyzer - FunctionalStatelessTestAarch64 - FunctionalStatelessTestAsan0 - FunctionalStatelessTestAsan1 @@ -4839,3 +4881,41 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py python3 merge_pr.py --check-approved +############################################################################################## +########################### SQLLOGIC TEST ################################################### +############################################################################################## + SQLLogicTestRelease: + needs: [BuilderDebRelease] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqllogic_debug + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Sqllogic test (release) + REPO_COPY=${{runner.temp}}/sqllogic_debug/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download json reports + uses: actions/download-artifact@v2 + with: + path: ${{ env.REPORTS_PATH }} + - name: Clear repository + run: | + sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE" + - name: Check out repository code + uses: actions/checkout@v2 + - name: Sqllogic test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqllogic_test.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 1282dbef50b..21284815583 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -406,6 +406,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" diff --git a/.gitmodules b/.gitmodules index ca55281e643..e4d63a34118 100644 --- a/.gitmodules +++ b/.gitmodules @@ -335,3 +335,6 @@ [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing +[submodule "contrib/isa-l"] + path = contrib/isa-l + url = https://github.com/ClickHouse/isa-l.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 47320208f02..e2505856d0c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v23.4, 2023-04-26](#234)**
**[ClickHouse release v23.3 LTS, 2023-03-30](#233)**
**[ClickHouse release v23.2, 2023-02-23](#232)**
**[ClickHouse release v23.1, 2023-01-25](#231)**
@@ -6,6 +7,153 @@ # 2023 Changelog +### ClickHouse release 23.4 LTS, 2023-04-26 + +#### Backward Incompatible Change +* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). +* This change makes sense only if you are using the virtual filesystem cache. If `path` in the virtual filesystem cache configuration is not empty and is not an absolute path, then it will be put in `/caches/`. [#48784](https://github.com/ClickHouse/ClickHouse/pull/48784) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Primary/secondary indices and sorting keys with identical expressions are now rejected. This behavior can be disabled using setting `allow_suspicious_indices`. [#48536](https://github.com/ClickHouse/ClickHouse/pull/48536) ([凌涛](https://github.com/lingtaolf)). + +#### New Feature +* Support new aggregate function `quantileGK`/`quantilesGK`, like [approx_percentile](https://spark.apache.org/docs/latest/api/sql/index.html#approx_percentile) in spark. Greenwald-Khanna algorithm refer to http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf. [#46428](https://github.com/ClickHouse/ClickHouse/pull/46428) ([李扬](https://github.com/taiyang-li)). +* Add a statement `SHOW COLUMNS` which shows distilled information from system.columns. [#48017](https://github.com/ClickHouse/ClickHouse/pull/48017) ([Robert Schulze](https://github.com/rschu1ze)). +* Added `LIGHTWEIGHT` and `PULL` modifiers for `SYSTEM SYNC REPLICA` query. `LIGHTWEIGHT` version waits for fetches and drop-ranges only (merges and mutations are ignored). `PULL` version pulls new entries from ZooKeeper and does not wait for them. Fixes [#47794](https://github.com/ClickHouse/ClickHouse/issues/47794). [#48085](https://github.com/ClickHouse/ClickHouse/pull/48085) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add `kafkaMurmurHash` function for compatibility with Kafka DefaultPartitioner. Closes [#47834](https://github.com/ClickHouse/ClickHouse/issues/47834). [#48185](https://github.com/ClickHouse/ClickHouse/pull/48185) ([Nikolay Degterinsky](https://github.com/evillique)). +* Allow to easily create a user with the same grants as the current user by using `GRANT CURRENT GRANTS`. [#48262](https://github.com/ClickHouse/ClickHouse/pull/48262) ([pufit](https://github.com/pufit)). +* Add statistical aggregate function `kolmogorovSmirnovTest`. Close [#48228](https://github.com/ClickHouse/ClickHouse/issues/48228). [#48325](https://github.com/ClickHouse/ClickHouse/pull/48325) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added a `lost_part_count` column to the `system.replicas` table. The column value shows the total number of lost parts in the corresponding table. Value is stored in zookeeper and can be used instead of not persistent `ReplicatedDataLoss` profile event for monitoring. [#48526](https://github.com/ClickHouse/ClickHouse/pull/48526) ([Sergei Trifonov](https://github.com/serxa)). +* Add `soundex` function for compatibility. Closes [#39880](https://github.com/ClickHouse/ClickHouse/issues/39880). [#48567](https://github.com/ClickHouse/ClickHouse/pull/48567) ([FriendLey](https://github.com/FriendLey)). +* Support `Map` type for JSONExtract. [#48629](https://github.com/ClickHouse/ClickHouse/pull/48629) ([李扬](https://github.com/taiyang-li)). +* Add `PrettyJSONEachRow` format to output pretty JSON with new line delimiters and 4 space indents. [#48898](https://github.com/ClickHouse/ClickHouse/pull/48898) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `ParquetMetadata` input format to read Parquet file metadata. [#48911](https://github.com/ClickHouse/ClickHouse/pull/48911) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `extractKeyValuePairs` function to extract key value pairs from strings. Input strings might contain noise (i.e. log files / do not need to be 100% formatted in key-value-pair format), the algorithm will look for key value pairs matching the arguments passed to the function. As of now, function accepts the following arguments: `data_column` (mandatory), `key_value_pair_delimiter` (defaults to `:`), `pair_delimiters` (defaults to `\space \, \;`) and `quoting_character` (defaults to double quotes). [#43606](https://github.com/ClickHouse/ClickHouse/pull/43606) ([Arthur Passos](https://github.com/arthurpassos)). +* Functions replaceOne(), replaceAll(), replaceRegexpOne() and replaceRegexpAll() can now be called with non-const pattern and replacement arguments. [#46589](https://github.com/ClickHouse/ClickHouse/pull/46589) ([Robert Schulze](https://github.com/rschu1ze)). +* Added functions to work with columns of type `Map`: `mapConcat`, `mapSort`, `mapExists`. [#48071](https://github.com/ClickHouse/ClickHouse/pull/48071) ([Anton Popov](https://github.com/CurtizJ)). + +#### Performance Improvement +* Reading files in `Parquet` format is now much faster. IO and decoding are parallelized (controlled by `max_threads` setting), and only required data ranges are read. [#47964](https://github.com/ClickHouse/ClickHouse/pull/47964) ([Michael Kolupaev](https://github.com/al13n321)). +* If we run a mutation with IN (subquery) like this: `ALTER TABLE t UPDATE col='new value' WHERE id IN (SELECT id FROM huge_table)` and the table `t` has multiple parts than for each part a set for subquery `SELECT id FROM huge_table` is built in memory. And if there are many parts then this might consume a lot of memory (and lead to an OOM) and CPU. The solution is to introduce a short-lived cache of sets that are currently being built by mutation tasks. If another task of the same mutation is executed concurrently it can look up the set in the cache, wait for it to be built and reuse it. [#46835](https://github.com/ClickHouse/ClickHouse/pull/46835) ([Alexander Gololobov](https://github.com/davenger)). +* Only check dependencies if necessary when applying `ALTER TABLE` queries. [#48062](https://github.com/ClickHouse/ClickHouse/pull/48062) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize function `mapUpdate`. [#48118](https://github.com/ClickHouse/ClickHouse/pull/48118) ([Anton Popov](https://github.com/CurtizJ)). +* Now an internal query to local replica is sent explicitly and data from it received through loopback interface. Setting `prefer_localhost_replica` is not respected for parallel replicas. This is needed for better scheduling and makes the code cleaner: the initiator is only responsible for coordinating of the reading process and merging results, continuously answering for requests while all the secondary queries read the data. Note: Using loopback interface is not so performant, otherwise some replicas could starve for tasks which could lead to even slower query execution and not utilizing all possible resources. The initialization of the coordinator is now even more lazy. All incoming requests contain the information about the reading algorithm we initialize the coordinator with it when first request comes. If any replica decides to read with a different algorithm–an exception will be thrown and a query will be aborted. [#48246](https://github.com/ClickHouse/ClickHouse/pull/48246) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not build set for the right side of `IN` clause with subquery when it is used only for analysis of skip indexes, and they are disabled by setting (`use_skip_indexes=0`). Previously it might affect the performance of queries. [#48299](https://github.com/ClickHouse/ClickHouse/pull/48299) ([Anton Popov](https://github.com/CurtizJ)). +* Query processing is parallelized right after reading `FROM file(...)`. Related to [#38755](https://github.com/ClickHouse/ClickHouse/issues/38755). [#48525](https://github.com/ClickHouse/ClickHouse/pull/48525) ([Igor Nikonov](https://github.com/devcrafter)). Query processing is parallelized right after reading from any data source. Affected data sources are mostly simple or external storages like table functions `url`, `file`. [#48727](https://github.com/ClickHouse/ClickHouse/pull/48727) ([Igor Nikonov](https://github.com/devcrafter)). This is controlled by the setting `parallelize_output_from_storages` which is not enabled by default. +* Lowered contention of ThreadPool mutex (may increase performance for a huge amount of small jobs). [#48750](https://github.com/ClickHouse/ClickHouse/pull/48750) ([Sergei Trifonov](https://github.com/serxa)). +* Reduce memory usage for multiple `ALTER DELETE` mutations. [#48522](https://github.com/ClickHouse/ClickHouse/pull/48522) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove the excessive connection attempts if the `skip_unavailable_shards` setting is enabled. [#48771](https://github.com/ClickHouse/ClickHouse/pull/48771) ([Azat Khuzhin](https://github.com/azat)). + +#### Experimental Feature +* Entries in the query cache are now squashed to max_block_size and compressed. [#45912](https://github.com/ClickHouse/ClickHouse/pull/45912) ([Robert Schulze](https://github.com/rschu1ze)). +* It is now possible to define per-user quotas in the query cache. [#48284](https://github.com/ClickHouse/ClickHouse/pull/48284) ([Robert Schulze](https://github.com/rschu1ze)). +* Some fixes for parallel replicas [#48433](https://github.com/ClickHouse/ClickHouse/pull/48433) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Implement zero-copy-replication (an experimental feature) on encrypted disks. [#48741](https://github.com/ClickHouse/ClickHouse/pull/48741) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Improvement +* Increase default value for `connect_timeout_with_failover_ms` to 1000 ms (because of adding async connections in https://github.com/ClickHouse/ClickHouse/pull/47229) . Closes [#5188](https://github.com/ClickHouse/ClickHouse/issues/5188). [#49009](https://github.com/ClickHouse/ClickHouse/pull/49009) ([Kruglov Pavel](https://github.com/Avogar)). +* Several improvements around data lakes: - Make `Iceberg` work with non-partitioned data. - Support `Iceberg` format version v2 (previously only v1 was supported) - Support reading partitioned data for `DeltaLake`/`Hudi` - Faster reading of `DeltaLake` metadata by using Delta's checkpoint files - Fixed incorrect `Hudi` reads: previously it incorrectly chose which data to read and therefore was able to read correctly only small size tables - Made these engines to pickup updates of changed data (previously the state was set on table creation) - Make proper testing for `Iceberg`/`DeltaLake`/`Hudi` using spark. [#47307](https://github.com/ClickHouse/ClickHouse/pull/47307) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add async connection to socket and async writing to socket. Make creating connections and sending query/external tables async across shards. Refactor code with fibers. Closes [#46931](https://github.com/ClickHouse/ClickHouse/issues/46931). We will be able to increase `connect_timeout_with_failover_ms` by default after this PR (https://github.com/ClickHouse/ClickHouse/issues/5188). [#47229](https://github.com/ClickHouse/ClickHouse/pull/47229) ([Kruglov Pavel](https://github.com/Avogar)). +* Support config sections `keeper`/`keeper_server` as an alternative to `zookeeper`. Close [#34766](https://github.com/ClickHouse/ClickHouse/issues/34766) , [#34767](https://github.com/ClickHouse/ClickHouse/issues/34767). [#35113](https://github.com/ClickHouse/ClickHouse/pull/35113) ([李扬](https://github.com/taiyang-li)). +* It is possible to set _secure_ flag in named_collections for a dictionary with a ClickHouse table source. Addresses [#38450](https://github.com/ClickHouse/ClickHouse/issues/38450) . [#46323](https://github.com/ClickHouse/ClickHouse/pull/46323) ([Ilya Golshtein](https://github.com/ilejn)). +* `bitCount` function support `FixedString` and `String` data type. [#49044](https://github.com/ClickHouse/ClickHouse/pull/49044) ([flynn](https://github.com/ucasfl)). +* Added configurable retries for all operations with [Zoo]Keeper for Backup queries. [#47224](https://github.com/ClickHouse/ClickHouse/pull/47224) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Enable `use_environment_credentials` for S3 by default, so the entire provider chain is constructed by default. [#47397](https://github.com/ClickHouse/ClickHouse/pull/47397) ([Antonio Andelic](https://github.com/antonio2368)). +* Currently, the JSON_VALUE function is similar as spark's get_json_object function, which support to get value from JSON string by a path like '$.key'. But still has something different - 1. in spark's get_json_object will return null while the path is not exist, but in JSON_VALUE will return empty string; - 2. in spark's get_json_object will return a complex type value, such as a JSON object/array value, but in JSON_VALUE will return empty string. [#47494](https://github.com/ClickHouse/ClickHouse/pull/47494) ([KevinyhZou](https://github.com/KevinyhZou)). +* For `use_structure_from_insertion_table_in_table_functions` more flexible insert table structure propagation to table function. Fixed an issue with name mapping and using virtual columns. No more need for 'auto' setting. [#47962](https://github.com/ClickHouse/ClickHouse/pull/47962) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Do not continue retrying to connect to Keeper if the query is killed or over limits. [#47985](https://github.com/ClickHouse/ClickHouse/pull/47985) ([Raúl Marín](https://github.com/Algunenano)). +* Support Enum output/input in `BSONEachRow`, allow all map key types and avoid extra calculations on output. [#48122](https://github.com/ClickHouse/ClickHouse/pull/48122) ([Kruglov Pavel](https://github.com/Avogar)). +* Support more ClickHouse types in `ORC`/`Arrow`/`Parquet` formats: Enum(8|16), (U)Int(128|256), Decimal256 (for ORC), allow reading IPv4 from Int32 values (ORC outputs IPv4 as Int32, and we couldn't read it back), fix reading Nullable(IPv6) from binary data for `ORC`. [#48126](https://github.com/ClickHouse/ClickHouse/pull/48126) ([Kruglov Pavel](https://github.com/Avogar)). +* Add columns `perform_ttl_move_on_insert`, `load_balancing` for table `system.storage_policies`, modify column `volume_type` type to `Enum8`. [#48167](https://github.com/ClickHouse/ClickHouse/pull/48167) ([lizhuoyu5](https://github.com/lzydmxy)). +* Added support for `BACKUP ALL` command which backups all tables and databases, including temporary and system ones. [#48189](https://github.com/ClickHouse/ClickHouse/pull/48189) ([Vitaly Baranov](https://github.com/vitlibar)). +* Function mapFromArrays supports `Map` type as an input. [#48207](https://github.com/ClickHouse/ClickHouse/pull/48207) ([李扬](https://github.com/taiyang-li)). +* The output of some SHOW PROCESSLIST is now sorted. [#48241](https://github.com/ClickHouse/ClickHouse/pull/48241) ([Robert Schulze](https://github.com/rschu1ze)). +* Per-query/per-server throttling for remote IO/local IO/BACKUPs (server settings: `max_remote_read_network_bandwidth_for_server`, `max_remote_write_network_bandwidth_for_server`, `max_local_read_bandwidth_for_server`, `max_local_write_bandwidth_for_server`, `max_backup_bandwidth_for_server`, settings: `max_remote_read_network_bandwidth`, `max_remote_write_network_bandwidth`, `max_local_read_bandwidth`, `max_local_write_bandwidth`, `max_backup_bandwidth`). [#48242](https://github.com/ClickHouse/ClickHouse/pull/48242) ([Azat Khuzhin](https://github.com/azat)). +* Support more types in `CapnProto` format: Map, (U)Int(128|256), Decimal(128|256). Allow integer conversions during input/output. [#48257](https://github.com/ClickHouse/ClickHouse/pull/48257) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't throw CURRENT_WRITE_BUFFER_IS_EXHAUSTED for normal behaviour. [#48288](https://github.com/ClickHouse/ClickHouse/pull/48288) ([Raúl Marín](https://github.com/Algunenano)). +* Add new setting `keeper_map_strict_mode` which enforces extra guarantees on operations made on top of `KeeperMap` tables. [#48293](https://github.com/ClickHouse/ClickHouse/pull/48293) ([Antonio Andelic](https://github.com/antonio2368)). +* Check primary key type for simple dictionary is native unsigned integer type Add setting `check_dictionary_primary_key ` for compatibility(set `check_dictionary_primary_key =false` to disable checking). [#48335](https://github.com/ClickHouse/ClickHouse/pull/48335) ([lizhuoyu5](https://github.com/lzydmxy)). +* Don't replicate mutations for `KeeperMap` because it's unnecessary. [#48354](https://github.com/ClickHouse/ClickHouse/pull/48354) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow to write/read unnamed tuple as nested Message in Protobuf format. Tuple elements and Message fields are matched by position. [#48390](https://github.com/ClickHouse/ClickHouse/pull/48390) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `additional_table_filters` and `additional_result_filter` settings in the new planner. Also, add a documentation entry for `additional_result_filter`. [#48405](https://github.com/ClickHouse/ClickHouse/pull/48405) ([Dmitry Novik](https://github.com/novikd)). +* `parseDateTime` now understands format string '%f' (fractional seconds). [#48420](https://github.com/ClickHouse/ClickHouse/pull/48420) ([Robert Schulze](https://github.com/rschu1ze)). +* Format string "%f" in formatDateTime() now prints "000000" if the formatted value has no fractional seconds, the previous behavior (single zero) can be restored using setting "formatdatetime_f_prints_single_zero = 1". [#48422](https://github.com/ClickHouse/ClickHouse/pull/48422) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't replicate DELETE and TRUNCATE for KeeperMap. [#48434](https://github.com/ClickHouse/ClickHouse/pull/48434) ([Antonio Andelic](https://github.com/antonio2368)). +* Generate valid Decimals and Bools in generateRandom function. [#48436](https://github.com/ClickHouse/ClickHouse/pull/48436) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow trailing commas in expression list of SELECT query, for example `SELECT a, b, c, FROM table`. Closes [#37802](https://github.com/ClickHouse/ClickHouse/issues/37802). [#48438](https://github.com/ClickHouse/ClickHouse/pull/48438) ([Nikolay Degterinsky](https://github.com/evillique)). +* Override `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables with `--user` and `--password` client parameters. Closes [#38909](https://github.com/ClickHouse/ClickHouse/issues/38909). [#48440](https://github.com/ClickHouse/ClickHouse/pull/48440) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added retries to loading of data parts in `MergeTree` tables in case of retryable errors. [#48442](https://github.com/ClickHouse/ClickHouse/pull/48442) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for `Date`, `Date32`, `DateTime`, `DateTime64` data types to `arrayMin`, `arrayMax`, `arrayDifference` functions. Closes [#21645](https://github.com/ClickHouse/ClickHouse/issues/21645). [#48445](https://github.com/ClickHouse/ClickHouse/pull/48445) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add support for `{server_uuid}` macro. It is useful for identifying replicas in autoscaled clusters when new replicas are constantly added and removed in runtime. This closes [#48554](https://github.com/ClickHouse/ClickHouse/issues/48554). [#48563](https://github.com/ClickHouse/ClickHouse/pull/48563) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The installation script will create a hard link instead of copying if it is possible. [#48578](https://github.com/ClickHouse/ClickHouse/pull/48578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support `SHOW TABLE` syntax meaning the same as `SHOW CREATE TABLE`. Closes [#48580](https://github.com/ClickHouse/ClickHouse/issues/48580). [#48591](https://github.com/ClickHouse/ClickHouse/pull/48591) ([flynn](https://github.com/ucasfl)). +* HTTP temporary buffers now support working by evicting data from the virtual filesystem cache. [#48664](https://github.com/ClickHouse/ClickHouse/pull/48664) ([Vladimir C](https://github.com/vdimir)). +* Make Schema inference works for `CREATE AS SELECT`. Closes [#47599](https://github.com/ClickHouse/ClickHouse/issues/47599). [#48679](https://github.com/ClickHouse/ClickHouse/pull/48679) ([flynn](https://github.com/ucasfl)). +* Added a `replicated_max_mutations_in_one_entry` setting for `ReplicatedMergeTree` that allows limiting the number of mutation commands per one `MUTATE_PART` entry (default is 10000). [#48731](https://github.com/ClickHouse/ClickHouse/pull/48731) ([Alexander Tokmakov](https://github.com/tavplubix)). +* In AggregateFunction types, don't count unused arena bytes as `read_bytes`. [#48745](https://github.com/ClickHouse/ClickHouse/pull/48745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix some MySQL-related settings not being handled with the MySQL dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)). +* If a user set `max_single_part_upload_size` to a very large value, it can lead to a crash due to a bug in the AWS S3 SDK. This fixes [#47679](https://github.com/ClickHouse/ClickHouse/issues/47679). [#48816](https://github.com/ClickHouse/ClickHouse/pull/48816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in `RabbitMQ` ([report](https://pastila.nl/?004f7100/de1505289ab5bb355e67ebe6c7cc8707)), refactor the code. [#48845](https://github.com/ClickHouse/ClickHouse/pull/48845) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add aliases `name` and `part_name` form `system.parts` and `system.part_log`. Closes [#48718](https://github.com/ClickHouse/ClickHouse/issues/48718). [#48850](https://github.com/ClickHouse/ClickHouse/pull/48850) ([sichenzhao](https://github.com/sichenzhao)). +* Functions "arrayDifferenceSupport()", "arrayCumSum()" and "arrayCumSumNonNegative()" now support input arrays of wide integer types (U)Int128/256. [#48866](https://github.com/ClickHouse/ClickHouse/pull/48866) ([cluster](https://github.com/infdahai)). +* Multi-line history in clickhouse-client is now no longer padded. This makes pasting more natural. [#48870](https://github.com/ClickHouse/ClickHouse/pull/48870) ([Joanna Hulboj](https://github.com/jh0x)). +* Implement a slight improvement for the rare case when ClickHouse is run inside LXC and LXCFS is used. The LXCFS has an issue: sometimes it returns an error "Transport endpoint is not connected" on reading from the file inside `/proc`. This error was correctly logged into ClickHouse's server log. We have additionally workaround this issue by reopening a file. This is a minuscule change. [#48922](https://github.com/ClickHouse/ClickHouse/pull/48922) ([Real](https://github.com/RunningXie)). +* Improve memory accounting for prefetches. Randomise prefetch settings In CI. [#48973](https://github.com/ClickHouse/ClickHouse/pull/48973) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly set headers for native copy operations on GCS. [#48981](https://github.com/ClickHouse/ClickHouse/pull/48981) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for specifying setting names in the command line with dashes instead of underscores, for example, `--max-threads` instead of `--max_threads`. Additionally, support Unicode dash characters like `—` instead of `--` - this is useful when you communicate with a team in another company, and a manager from that team copy-pasted code from MS Word. [#48985](https://github.com/ClickHouse/ClickHouse/pull/48985) ([alekseygolub](https://github.com/alekseygolub)). +* Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)). +* Improve the embedded dashboard. Close [#46671](https://github.com/ClickHouse/ClickHouse/issues/46671). [#49036](https://github.com/ClickHouse/ClickHouse/pull/49036) ([Kevin Zhang](https://github.com/Kinzeng)). +* Add profile events for log messages, so you can easily see the count of log messages by severity. [#49042](https://github.com/ClickHouse/ClickHouse/pull/49042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In previous versions, the `LineAsString` format worked inconsistently when the parallel parsing was enabled or not, in presence of DOS or macOS Classic line breaks. This closes [#49039](https://github.com/ClickHouse/ClickHouse/issues/49039). [#49052](https://github.com/ClickHouse/ClickHouse/pull/49052) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The exception message about the unparsed query parameter will also tell about the name of the parameter. Reimplement [#48878](https://github.com/ClickHouse/ClickHouse/issues/48878). Close [#48772](https://github.com/ClickHouse/ClickHouse/issues/48772). [#49061](https://github.com/ClickHouse/ClickHouse/pull/49061) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Build/Testing/Packaging Improvement +* Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Reduce the number of dependencies in the header files to speed up the build. [#47984](https://github.com/ClickHouse/ClickHouse/pull/47984) ([Dmitry Novik](https://github.com/novikd)). +* Randomize compression of marks and indices in tests. [#48286](https://github.com/ClickHouse/ClickHouse/pull/48286) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Bump internal ZSTD from 1.5.4 to 1.5.5. [#46797](https://github.com/ClickHouse/ClickHouse/pull/46797) ([Robert Schulze](https://github.com/rschu1ze)). +* Randomize vertical merges from compact to wide parts in tests. [#48287](https://github.com/ClickHouse/ClickHouse/pull/48287) ([Raúl Marín](https://github.com/Algunenano)). +* Support for CRC32 checksum in HDFS. Fix performance issues. [#48614](https://github.com/ClickHouse/ClickHouse/pull/48614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove remainders of GCC support. [#48671](https://github.com/ClickHouse/ClickHouse/pull/48671) ([Robert Schulze](https://github.com/rschu1ze)). +* Add CI run with new analyzer infrastructure enabled. [#48719](https://github.com/ClickHouse/ClickHouse/pull/48719) ([Dmitry Novik](https://github.com/novikd)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix system.query_views_log for MVs that are pushed from background threads [#46668](https://github.com/ClickHouse/ClickHouse/pull/46668) ([Azat Khuzhin](https://github.com/azat)). +* Fix several `RENAME COLUMN` bugs [#46946](https://github.com/ClickHouse/ClickHouse/pull/46946) ([alesapin](https://github.com/alesapin)). +* Fix minor hiliting issues in clickhouse-format [#47610](https://github.com/ClickHouse/ClickHouse/pull/47610) ([Natasha Murashkina](https://github.com/murfel)). +* Fix a bug in LLVM's libc++ leading to a crash for uploading parts to S3 which size is greater than INT_MAX [#47693](https://github.com/ClickHouse/ClickHouse/pull/47693) ([Azat Khuzhin](https://github.com/azat)). +* Fix overflow in the `sparkbar` function [#48121](https://github.com/ClickHouse/ClickHouse/pull/48121) ([Vladimir C](https://github.com/vdimir)). +* Fix race in S3 [#48190](https://github.com/ClickHouse/ClickHouse/pull/48190) ([Anton Popov](https://github.com/CurtizJ)). +* Disable JIT for aggregate functions due to inconsistent behavior [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix alter formatting (minor) [#48289](https://github.com/ClickHouse/ClickHouse/pull/48289) ([Natasha Murashkina](https://github.com/murfel)). +* Fix CPU usage in RabbitMQ (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix crash in EXPLAIN PIPELINE for Merge over Distributed [#48320](https://github.com/ClickHouse/ClickHouse/pull/48320) ([Azat Khuzhin](https://github.com/azat)). +* Fix serializing LowCardinality as Arrow dictionary [#48361](https://github.com/ClickHouse/ClickHouse/pull/48361) ([Kruglov Pavel](https://github.com/Avogar)). +* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)). +* Fix possible SYSTEM SYNC REPLICA stuck in case of DROP/REPLACE PARTITION [#48391](https://github.com/ClickHouse/ClickHouse/pull/48391) ([Azat Khuzhin](https://github.com/azat)). +* Fix a startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Don't check dependencies when renaming system tables automatically [#48431](https://github.com/ClickHouse/ClickHouse/pull/48431) ([Raúl Marín](https://github.com/Algunenano)). +* Update only affected rows in KeeperMap storage [#48435](https://github.com/ClickHouse/ClickHouse/pull/48435) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible segfault in the VFS cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `toTimeZone` function throws an error when no constant string is provided [#48471](https://github.com/ClickHouse/ClickHouse/pull/48471) ([Jordi Villar](https://github.com/jrdi)). +* Fix logical error with IPv4 in Protobuf, add support for Date32 [#48486](https://github.com/ClickHouse/ClickHouse/pull/48486) ([Kruglov Pavel](https://github.com/Avogar)). +* "changed" flag in system.settings was calculated incorrectly for settings with multiple values [#48516](https://github.com/ClickHouse/ClickHouse/pull/48516) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix storage `Memory` with enabled compression [#48517](https://github.com/ClickHouse/ClickHouse/pull/48517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bracketed-paste mode messing up password input in the event of client reconnection [#48528](https://github.com/ClickHouse/ClickHouse/pull/48528) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix an uncaught exception in case of parallel loader for hashed dictionaries [#48571](https://github.com/ClickHouse/ClickHouse/pull/48571) ([Azat Khuzhin](https://github.com/azat)). +* The `groupArray` aggregate function correctly works for empty result over nullable types [#48593](https://github.com/ClickHouse/ClickHouse/pull/48593) ([lgbo](https://github.com/lgbo-ustc)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Allow IPv4 comparison operators with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix possible error from cache [#48636](https://github.com/ClickHouse/ClickHouse/pull/48636) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Async inserts with empty data will no longer throw exception. [#48663](https://github.com/ClickHouse/ClickHouse/pull/48663) ([Anton Popov](https://github.com/CurtizJ)). +* Fix table dependencies in case of failed RENAME TABLE [#48683](https://github.com/ClickHouse/ClickHouse/pull/48683) ([Azat Khuzhin](https://github.com/azat)). +* If the primary key has duplicate columns (which is only possible for projections), in previous versions it might lead to a bug [#48838](https://github.com/ClickHouse/ClickHouse/pull/48838) ([Amos Bird](https://github.com/amosbird)). +* Fix for a race condition in ZooKeeper when joining send_thread/receive_thread [#48849](https://github.com/ClickHouse/ClickHouse/pull/48849) ([Alexander Gololobov](https://github.com/davenger)). +* Fix unexpected part name error when trying to drop a ignored detached part with zero copy replication [#48862](https://github.com/ClickHouse/ClickHouse/pull/48862) ([Michael Lex](https://github.com/mlex)). +* Fix reading `Date32` Parquet/Arrow column into not a `Date32` column [#48864](https://github.com/ClickHouse/ClickHouse/pull/48864) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `UNKNOWN_IDENTIFIER` error while selecting from table with row policy and column with dots [#48976](https://github.com/ClickHouse/ClickHouse/pull/48976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix aggregation by empty nullable strings [#48999](https://github.com/ClickHouse/ClickHouse/pull/48999) ([LiuNeng](https://github.com/liuneng1994)). + ### ClickHouse release 23.3 LTS, 2023-03-30 #### Upgrade Notes diff --git a/CMakeLists.txt b/CMakeLists.txt index ce615c11f2b..0554403cce5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -58,7 +58,7 @@ if (ENABLE_CHECK_HEAVY_BUILDS) set (RLIMIT_CPU 1000) # gcc10/gcc10/clang -fsanitize=memory is too heavy - if (SANITIZE STREQUAL "memory" OR COMPILER_GCC) + if (SANITIZE STREQUAL "memory") set (RLIMIT_DATA 10000000000) # 10G endif() @@ -286,48 +286,31 @@ set (CMAKE_C_STANDARD 11) set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C set (CMAKE_C_STANDARD_REQUIRED ON) -if (COMPILER_GCC OR COMPILER_CLANG) - # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. - # See https://reviews.llvm.org/D112921 - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") -endif () - -# falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable -# benchmarks. -if (COMPILER_GCC OR COMPILER_CLANG) - set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32") -endif () - -if (ARCH_AMD64) - # align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change, - # which makes benchmark results more stable. - set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries") - if (COMPILER_GCC) - # gcc is in assembler, need to add "-Wa," prefix - set(BRANCHES_WITHIN_32B_BOUNDARIES "-Wa,${BRANCHES_WITHIN_32B_BOUNDARIES}") - endif() - - set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}") -endif() - -if (COMPILER_GCC) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fcoroutines") -endif () - # Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF) -if (WITH_COVERAGE AND COMPILER_CLANG) - set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") - # If we want to disable coverage for specific translation units - set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping") -endif() +if (COMPILER_CLANG) + # Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure. + # See https://reviews.llvm.org/D112921 + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation") -if (WITH_COVERAGE AND COMPILER_GCC) - set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-arcs -ftest-coverage") - set(COVERAGE_OPTION "-lgcov") - set(WITHOUT_COVERAGE "-fno-profile-arcs -fno-test-coverage") -endif() + # falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable + # benchmarks. + set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32") + + if (ARCH_AMD64) + # align branches within a 32-Byte boundary to avoid the potential performance loss when code layout change, + # which makes benchmark results more stable. + set(BRANCHES_WITHIN_32B_BOUNDARIES "-mbranches-within-32B-boundaries") + set(COMPILER_FLAGS "${COMPILER_FLAGS} ${BRANCHES_WITHIN_32B_BOUNDARIES}") + endif() + + if (WITH_COVERAGE) + set(COMPILER_FLAGS "${COMPILER_FLAGS} -fprofile-instr-generate -fcoverage-mapping") + # If we want to disable coverage for specific translation units + set(WITHOUT_COVERAGE "-fno-profile-instr-generate -fno-coverage-mapping") + endif() +endif () set (COMPILER_FLAGS "${COMPILER_FLAGS}") @@ -410,7 +393,11 @@ else() endif () option (ENABLE_GWP_ASAN "Enable Gwp-Asan" ON) -if (NOT OS_LINUX AND NOT OS_ANDROID) +# We use mmap for allocations more heavily in debug builds, +# but GWP-ASan also wants to use mmap frequently, +# and due to a large number of memory mappings, +# it does not work together well. +if ((NOT OS_LINUX AND NOT OS_ANDROID) OR (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) set(ENABLE_GWP_ASAN OFF) endif () @@ -434,8 +421,11 @@ endif () set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX") -set (CMAKE_POSITION_INDEPENDENT_CODE OFF) -if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X)) +if (NOT SANITIZE) + set (CMAKE_POSITION_INDEPENDENT_CODE OFF) +endif() + +if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X) AND NOT SANITIZE) # Slightly more efficient code can be generated # It's disabled for ARM because otherwise ClickHouse cannot run on Android. set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") diff --git a/README.md b/README.md index 61d840ecd34..c82c64cfd22 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/clickhouse-presentations/raw/master/images/logo-400x240.png)](https://clickhouse.com) +[ClickHouse — open source distributed column-oriented DBMS](https://clickhouse.com?utm_source=github) ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time. @@ -21,10 +21,11 @@ curl https://clickhouse.com/ | sh * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming Events -* [**ClickHouse Meetup in Austin**](https://www.meetup.com/clickhouse-austin-user-group/events/291486654/) - Mar 30 - The first ClickHouse Meetup in Austin is happening soon! Interested in speaking, let us know! -* [**v23.3 Release Webinar**](https://clickhouse.com/company/events/v23-3-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-02) - Mar 30 - 23.3 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. +* [**ClickHouse Spring Meetup in Manhattan**](https://www.meetup.com/clickhouse-new-york-user-group/events/292517734) - April 26 - It's spring, and it's time to meet again in the city! Talks include: "Building a domain specific query language on top of Clickhouse", "A Galaxy of Information", "Our Journey to ClickHouse Cloud from Redshift", and a ClickHouse update! +* [**v23.4 Release Webinar**](https://clickhouse.com/company/events/v23-4-release-webinar?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-04) - April 26 - 23.4 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. +* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/292892466) - May 16 - Save the date! ClickHouse is coming back to Berlin. We’re excited to announce an upcoming ClickHouse Meetup that you won’t want to miss. Join us as we gather together to discuss the latest in the world of ClickHouse and share user stories. ## Recent Recordings -* **FOSDEM 2023**: In the "Fast and Streaming Data" room Alexey gave a talk entitled "Building Analytical Apps With ClickHouse" that looks at the landscape of data tools, an interesting data set, and how you can interact with data quickly. Check out the recording on **[YouTube](https://www.youtube.com/watch?v=JlcI2Vfz_uk)**. -* **Recording available**: [**v23.2 Release Webinar**](https://www.youtube.com/watch?v=2o0vRMMIrkY) NTILE Window Function support, Partition Key for GROUP By, io_uring, Apache Iceberg support, Dynamic Disks, integrations updates! Watch it now! +* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" +* **Recording available**: [**v23.3 Release Webinar**](https://www.youtube.com/watch?v=ISaGUjvBNao) UNDROP TABLE, server settings introspection, nested dynamic disks, MySQL compatibility, parseDate Time, Lightweight Deletes, Parallel Replicas, integrations updates, and so much more! Watch it now! * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) diff --git a/SECURITY.md b/SECURITY.md index 566a1820834..44a122956b4 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.4 | ✔️ | | 23.3 | ✔️ | | 23.2 | ✔️ | -| 23.1 | ✔️ | +| 23.1 | ❌ | | 22.12 | ❌ | | 22.11 | ❌ | | 22.10 | ❌ | diff --git a/base/base/Decimal.h b/base/base/Decimal.h index 22cb577b1b2..2405ba9ca0d 100644 --- a/base/base/Decimal.h +++ b/base/base/Decimal.h @@ -1,5 +1,6 @@ #pragma once #include +#include #if !defined(NO_SANITIZE_UNDEFINED) #if defined(__clang__) @@ -19,23 +20,6 @@ using Decimal64 = Decimal; using Decimal128 = Decimal; using Decimal256 = Decimal; -template -concept is_decimal = - std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v; - -template -concept is_over_big_int = - std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v - || std::is_same_v; - template struct NativeTypeT { using Type = T; }; template struct NativeTypeT { using Type = typename T::NativeType; }; template using NativeType = typename NativeTypeT::Type; diff --git a/base/base/Decimal_fwd.h b/base/base/Decimal_fwd.h new file mode 100644 index 00000000000..589d6224917 --- /dev/null +++ b/base/base/Decimal_fwd.h @@ -0,0 +1,46 @@ +#pragma once + +#include + +namespace wide +{ + +template +class integer; + +} + +using Int128 = wide::integer<128, signed>; +using UInt128 = wide::integer<128, unsigned>; +using Int256 = wide::integer<256, signed>; +using UInt256 = wide::integer<256, unsigned>; + +namespace DB +{ + +template struct Decimal; + +using Decimal32 = Decimal; +using Decimal64 = Decimal; +using Decimal128 = Decimal; +using Decimal256 = Decimal; + +class DateTime64; + +template +concept is_decimal = + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v; + +template +concept is_over_big_int = + std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v + || std::is_same_v; +} diff --git a/base/base/IPv4andIPv6.h b/base/base/IPv4andIPv6.h index 0e97d83b07e..7b745ec7b84 100644 --- a/base/base/IPv4andIPv6.h +++ b/base/base/IPv4andIPv6.h @@ -51,3 +51,15 @@ namespace DB }; } + +namespace std +{ + template <> + struct hash + { + size_t operator()(const DB::IPv6 & x) const + { + return std::hash()(x.toUnderType()); + } + }; +} diff --git a/base/base/argsToConfig.cpp b/base/base/argsToConfig.cpp index d7983779d2d..faa1462218d 100644 --- a/base/base/argsToConfig.cpp +++ b/base/base/argsToConfig.cpp @@ -3,13 +3,29 @@ #include #include - -void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::LayeredConfiguration & config, int priority) +void argsToConfig(const Poco::Util::Application::ArgVec & argv, + Poco::Util::LayeredConfiguration & config, + int priority, + const std::unordered_set* alias_names) { /// Parsing all args and converting to config layer /// Test: -- --1=1 --1=2 --3 5 7 8 -9 10 -11=12 14= 15== --16==17 --=18 --19= --20 21 22 --23 --24 25 --26 -27 28 ---29=30 -- ----31 32 --33 3-4 Poco::AutoPtr map_config = new Poco::Util::MapConfiguration; std::string key; + + auto add_arg = [&map_config, &alias_names](const std::string & k, const std::string & v) + { + map_config->setString(k, v); + + if (alias_names && !alias_names->contains(k)) + { + std::string alias_key = k; + std::replace(alias_key.begin(), alias_key.end(), '-', '_'); + if (alias_names->contains(alias_key)) + map_config->setString(alias_key, v); + } + }; + for (const auto & arg : argv) { auto key_start = arg.find_first_not_of('-'); @@ -19,7 +35,7 @@ void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::Laye // old saved '--key', will set to some true value "1" if (!key.empty() && pos_minus != std::string::npos && pos_minus < key_start) { - map_config->setString(key, "1"); + add_arg(key, "1"); key = ""; } @@ -29,7 +45,7 @@ void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::Laye { if (pos_minus == std::string::npos || pos_minus > key_start) { - map_config->setString(key, arg); + add_arg(key, arg); } key = ""; } @@ -55,7 +71,7 @@ void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::Laye if (arg.size() > pos_eq) value = arg.substr(pos_eq + 1); - map_config->setString(key, value); + add_arg(key, value); key = ""; } diff --git a/base/base/argsToConfig.h b/base/base/argsToConfig.h index 9b7b44b7b7f..ef34a8a2145 100644 --- a/base/base/argsToConfig.h +++ b/base/base/argsToConfig.h @@ -1,6 +1,8 @@ #pragma once #include +#include +#include namespace Poco::Util { @@ -8,4 +10,7 @@ class LayeredConfiguration; // NOLINT(cppcoreguidelines-virtual-class-destructor } /// Import extra command line arguments to configuration. These are command line arguments after --. -void argsToConfig(const Poco::Util::Application::ArgVec & argv, Poco::Util::LayeredConfiguration & config, int priority); +void argsToConfig(const Poco::Util::Application::ArgVec & argv, + Poco::Util::LayeredConfiguration & config, + int priority, + const std::unordered_set* registered_alias_names = nullptr); diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index fe5d3bbadab..a8747ecc9b7 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -34,10 +34,52 @@ * If no such characters, returns nullptr. */ +struct SearchSymbols +{ + static constexpr auto BUFFER_SIZE = 16; + + SearchSymbols() = default; + + explicit SearchSymbols(std::string in) + : str(std::move(in)) + { +#if defined(__SSE4_2__) + if (str.size() > BUFFER_SIZE) + { + throw std::runtime_error("SearchSymbols can contain at most " + std::to_string(BUFFER_SIZE) + " symbols and " + std::to_string(str.size()) + " was provided\n"); + } + + char tmp_safety_buffer[BUFFER_SIZE] = {0}; + + memcpy(tmp_safety_buffer, str.data(), str.size()); + + simd_vector = _mm_loadu_si128(reinterpret_cast(tmp_safety_buffer)); +#endif + } + +#if defined(__SSE4_2__) + __m128i simd_vector; +#endif + std::string str; +}; + namespace detail { template constexpr bool is_in(char x) { return ((x == chars) || ...); } // NOLINT(misc-redundant-expression) +static bool is_in(char c, const char * symbols, size_t num_chars) +{ + for (size_t i = 0u; i < num_chars; ++i) + { + if (c == symbols[i]) + { + return true; + } + } + + return false; +} + #if defined(__SSE2__) template inline __m128i mm_is_in(__m128i bytes) @@ -53,6 +95,43 @@ inline __m128i mm_is_in(__m128i bytes) __m128i eq = mm_is_in(bytes); return _mm_or_si128(eq0, eq); } + +inline __m128i mm_is_in(__m128i bytes, const char * symbols, size_t num_chars) +{ + __m128i accumulator = _mm_setzero_si128(); + for (size_t i = 0; i < num_chars; ++i) + { + __m128i eq = _mm_cmpeq_epi8(bytes, _mm_set1_epi8(symbols[i])); + accumulator = _mm_or_si128(accumulator, eq); + } + + return accumulator; +} + +inline std::array<__m128i, 16u> mm_is_in_prepare(const char * symbols, size_t num_chars) +{ + std::array<__m128i, 16u> result {}; + + for (size_t i = 0; i < num_chars; ++i) + { + result[i] = _mm_set1_epi8(symbols[i]); + } + + return result; +} + +inline __m128i mm_is_in_execute(__m128i bytes, const std::array<__m128i, 16u> & needles) +{ + __m128i accumulator = _mm_setzero_si128(); + + for (const auto & needle : needles) + { + __m128i eq = _mm_cmpeq_epi8(bytes, needle); + accumulator = _mm_or_si128(accumulator, eq); + } + + return accumulator; +} #endif template @@ -99,6 +178,32 @@ inline const char * find_first_symbols_sse2(const char * const begin, const char return return_mode == ReturnMode::End ? end : nullptr; } +template +inline const char * find_first_symbols_sse2(const char * const begin, const char * const end, const char * symbols, size_t num_chars) +{ + const char * pos = begin; + +#if defined(__SSE2__) + const auto needles = mm_is_in_prepare(symbols, num_chars); + for (; pos + 15 < end; pos += 16) + { + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos)); + + __m128i eq = mm_is_in_execute(bytes, needles); + + uint16_t bit_mask = maybe_negate(uint16_t(_mm_movemask_epi8(eq))); + if (bit_mask) + return pos + __builtin_ctz(bit_mask); + } +#endif + + for (; pos < end; ++pos) + if (maybe_negate(is_in(*pos, symbols, num_chars))) + return pos; + + return return_mode == ReturnMode::End ? end : nullptr; +} + template inline const char * find_last_symbols_sse2(const char * const begin, const char * const end) @@ -179,6 +284,41 @@ inline const char * find_first_symbols_sse42(const char * const begin, const cha return return_mode == ReturnMode::End ? end : nullptr; } +template +inline const char * find_first_symbols_sse42(const char * const begin, const char * const end, const SearchSymbols & symbols) +{ + const char * pos = begin; + + const auto num_chars = symbols.str.size(); + +#if defined(__SSE4_2__) + constexpr int mode = _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT; + + const __m128i set = symbols.simd_vector; + + for (; pos + 15 < end; pos += 16) + { + __m128i bytes = _mm_loadu_si128(reinterpret_cast(pos)); + + if constexpr (positive) + { + if (_mm_cmpestrc(set, num_chars, bytes, 16, mode)) + return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode); + } + else + { + if (_mm_cmpestrc(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY)) + return pos + _mm_cmpestri(set, num_chars, bytes, 16, mode | _SIDD_NEGATIVE_POLARITY); + } + } +#endif + + for (; pos < end; ++pos) + if (maybe_negate(is_in(*pos, symbols.str.data(), num_chars))) + return pos; + + return return_mode == ReturnMode::End ? end : nullptr; +} /// NOTE No SSE 4.2 implementation for find_last_symbols_or_null. Not worth to do. @@ -194,6 +334,17 @@ inline const char * find_first_symbols_dispatch(const char * begin, const char * return find_first_symbols_sse2(begin, end); } +template +inline const char * find_first_symbols_dispatch(const std::string_view haystack, const SearchSymbols & symbols) +{ +#if defined(__SSE4_2__) + if (symbols.str.size() >= 5) + return find_first_symbols_sse42(haystack.begin(), haystack.end(), symbols); + else +#endif + return find_first_symbols_sse2(haystack.begin(), haystack.end(), symbols.str.data(), symbols.str.size()); +} + } @@ -211,6 +362,11 @@ inline char * find_first_symbols(char * begin, char * end) return const_cast(detail::find_first_symbols_dispatch(begin, end)); } +inline const char * find_first_symbols(std::string_view haystack, const SearchSymbols & symbols) +{ + return detail::find_first_symbols_dispatch(haystack, symbols); +} + template inline const char * find_first_not_symbols(const char * begin, const char * end) { @@ -223,6 +379,11 @@ inline char * find_first_not_symbols(char * begin, char * end) return const_cast(detail::find_first_symbols_dispatch(begin, end)); } +inline const char * find_first_not_symbols(std::string_view haystack, const SearchSymbols & symbols) +{ + return detail::find_first_symbols_dispatch(haystack, symbols); +} + template inline const char * find_first_symbols_or_null(const char * begin, const char * end) { @@ -235,6 +396,11 @@ inline char * find_first_symbols_or_null(char * begin, char * end) return const_cast(detail::find_first_symbols_dispatch(begin, end)); } +inline const char * find_first_symbols_or_null(std::string_view haystack, const SearchSymbols & symbols) +{ + return detail::find_first_symbols_dispatch(haystack, symbols); +} + template inline const char * find_first_not_symbols_or_null(const char * begin, const char * end) { @@ -247,6 +413,10 @@ inline char * find_first_not_symbols_or_null(char * begin, char * end) return const_cast(detail::find_first_symbols_dispatch(begin, end)); } +inline const char * find_first_not_symbols_or_null(std::string_view haystack, const SearchSymbols & symbols) +{ + return detail::find_first_symbols_dispatch(haystack, symbols); +} template inline const char * find_last_symbols_or_null(const char * begin, const char * end) diff --git a/base/base/unaligned.h b/base/base/unaligned.h index fcaaa38f2fe..3ab25c803bb 100644 --- a/base/base/unaligned.h +++ b/base/base/unaligned.h @@ -5,44 +5,6 @@ #include -inline void reverseMemcpy(void * dst, const void * src, size_t size) -{ - uint8_t * uint_dst = reinterpret_cast(dst); - const uint8_t * uint_src = reinterpret_cast(src); - - uint_dst += size; - while (size) - { - --uint_dst; - *uint_dst = *uint_src; - ++uint_src; - --size; - } -} - -template -inline T unalignedLoadLE(const void * address) -{ - T res {}; - if constexpr (std::endian::native == std::endian::little) - memcpy(&res, address, sizeof(res)); - else - reverseMemcpy(&res, address, sizeof(res)); - return res; -} - - -template -inline void unalignedStoreLE(void * address, - const typename std::enable_if::type & src) -{ - static_assert(std::is_trivially_copyable_v); - if constexpr (std::endian::native == std::endian::little) - memcpy(address, &src, sizeof(src)); - else - reverseMemcpy(address, &src, sizeof(src)); -} - template inline T unalignedLoad(const void * address) { @@ -62,3 +24,70 @@ inline void unalignedStore(void * address, static_assert(std::is_trivially_copyable_v); memcpy(address, &src, sizeof(src)); } + + +inline void reverseMemcpy(void * dst, const void * src, size_t size) +{ + uint8_t * uint_dst = reinterpret_cast(dst); + const uint8_t * uint_src = reinterpret_cast(src); + + uint_dst += size; + while (size) + { + --uint_dst; + *uint_dst = *uint_src; + ++uint_src; + --size; + } +} + +template +inline T unalignedLoadEndian(const void * address) +{ + T res {}; + if constexpr (std::endian::native == endian) + memcpy(&res, address, sizeof(res)); + else + reverseMemcpy(&res, address, sizeof(res)); + return res; +} + + +template +inline void unalignedStoreEndian(void * address, T & src) +{ + static_assert(std::is_trivially_copyable_v); + if constexpr (std::endian::native == endian) + memcpy(address, &src, sizeof(src)); + else + reverseMemcpy(address, &src, sizeof(src)); +} + + +template +inline T unalignedLoadLittleEndian(const void * address) +{ + return unalignedLoadEndian(address); +} + + +template +inline void unalignedStoreLittleEndian(void * address, + const typename std::enable_if::type & src) +{ + unalignedStoreEndian(address, src); +} + +template +inline T unalignedLoadBigEndian(const void * address) +{ + return unalignedLoadEndian(address); +} + + +template +inline void unalignedStoreBigEndian(void * address, + const typename std::enable_if::type & src) +{ + unalignedStoreEndian(address, src); +} diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 7cdb527f9cf..ed4570d5e3f 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -155,13 +155,13 @@ struct common_type, Arithmetic> std::is_floating_point_v, Arithmetic, std::conditional_t< - sizeof(Arithmetic) < Bits * sizeof(long), + sizeof(Arithmetic) * 8 < Bits, wide::integer, std::conditional_t< - Bits * sizeof(long) < sizeof(Arithmetic), + Bits < sizeof(Arithmetic) * 8, Arithmetic, std::conditional_t< - Bits * sizeof(long) == sizeof(Arithmetic) && (std::is_same_v || std::is_signed_v), + Bits == sizeof(Arithmetic) * 8 && (std::is_same_v || std::is_signed_v), Arithmetic, wide::integer>>>>; }; diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c index 7e8ea5051d7..49bb81a58be 100644 --- a/base/glibc-compatibility/glibc-compatibility.c +++ b/base/glibc-compatibility/glibc-compatibility.c @@ -235,6 +235,17 @@ ssize_t getrandom(void *buf, size_t buflen, unsigned flags) return syscall(SYS_getrandom, buf, buflen, flags); } +/* Structure for scatter/gather I/O. */ +struct iovec +{ + void *iov_base; /* Pointer to data. */ + size_t iov_len; /* Length of data. */ +}; + +ssize_t preadv(int __fd, const struct iovec *__iovec, int __count, __off_t __offset) +{ + return syscall(SYS_preadv, __fd, __iovec, __count, (long)(__offset), (long)(__offset>>32)); +} #include #include diff --git a/base/poco/Crypto/include/Poco/Crypto/ECKeyImpl.h b/base/poco/Crypto/include/Poco/Crypto/ECKeyImpl.h index 2a72861a84e..d051ef1b768 100644 --- a/base/poco/Crypto/include/Poco/Crypto/ECKeyImpl.h +++ b/base/poco/Crypto/include/Poco/Crypto/ECKeyImpl.h @@ -90,20 +90,6 @@ namespace Crypto std::string groupName() const; /// Returns the EC key group name. - void save(const std::string & publicKeyFile, const std::string & privateKeyFile = "", const std::string & privateKeyPassphrase = "") - const; - /// Exports the public and private keys to the given files. - /// - /// If an empty filename is specified, the corresponding key - /// is not exported. - - void - save(std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream = 0, const std::string & privateKeyPassphrase = "") const; - /// Exports the public and private key to the given streams. - /// - /// If a null pointer is passed for a stream, the corresponding - /// key is not exported. - static std::string getCurveName(int nid = -1); /// Returns elliptical curve name corresponding to /// the given nid; if nid is not found, returns @@ -150,22 +136,6 @@ namespace Crypto { return OBJ_nid2sn(groupId()); } - - - inline void - ECKeyImpl::save(const std::string & publicKeyFile, const std::string & privateKeyFile, const std::string & privateKeyPassphrase) const - { - EVPPKey(_pEC).save(publicKeyFile, privateKeyFile, privateKeyPassphrase); - } - - - inline void - ECKeyImpl::save(std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream, const std::string & privateKeyPassphrase) const - { - EVPPKey(_pEC).save(pPublicKeyStream, pPrivateKeyStream, privateKeyPassphrase); - } - - } } // namespace Poco::Crypto diff --git a/base/poco/Crypto/include/Poco/Crypto/KeyPair.h b/base/poco/Crypto/include/Poco/Crypto/KeyPair.h index 36adbec6a4d..291a0f8b749 100644 --- a/base/poco/Crypto/include/Poco/Crypto/KeyPair.h +++ b/base/poco/Crypto/include/Poco/Crypto/KeyPair.h @@ -56,24 +56,6 @@ namespace Crypto virtual int size() const; /// Returns the RSA modulus size. - virtual void save( - const std::string & publicKeyPairFile, - const std::string & privateKeyPairFile = "", - const std::string & privateKeyPairPassphrase = "") const; - /// Exports the public and private keys to the given files. - /// - /// If an empty filename is specified, the corresponding key - /// is not exported. - - virtual void save( - std::ostream * pPublicKeyPairStream, - std::ostream * pPrivateKeyPairStream = 0, - const std::string & privateKeyPairPassphrase = "") const; - /// Exports the public and private key to the given streams. - /// - /// If a null pointer is passed for a stream, the corresponding - /// key is not exported. - KeyPairImpl::Ptr impl() const; /// Returns the impl object. @@ -97,21 +79,6 @@ namespace Crypto return _pImpl->size(); } - - inline void - KeyPair::save(const std::string & publicKeyFile, const std::string & privateKeyFile, const std::string & privateKeyPassphrase) const - { - _pImpl->save(publicKeyFile, privateKeyFile, privateKeyPassphrase); - } - - - inline void - KeyPair::save(std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream, const std::string & privateKeyPassphrase) const - { - _pImpl->save(pPublicKeyStream, pPrivateKeyStream, privateKeyPassphrase); - } - - inline const std::string & KeyPair::name() const { return _pImpl->name(); diff --git a/base/poco/Crypto/include/Poco/Crypto/KeyPairImpl.h b/base/poco/Crypto/include/Poco/Crypto/KeyPairImpl.h index 155efd20b9c..ecafbef0241 100644 --- a/base/poco/Crypto/include/Poco/Crypto/KeyPairImpl.h +++ b/base/poco/Crypto/include/Poco/Crypto/KeyPairImpl.h @@ -55,22 +55,6 @@ namespace Crypto virtual int size() const = 0; /// Returns the key size. - virtual void save( - const std::string & publicKeyFile, - const std::string & privateKeyFile = "", - const std::string & privateKeyPassphrase = "") const = 0; - /// Exports the public and private keys to the given files. - /// - /// If an empty filename is specified, the corresponding key - /// is not exported. - - virtual void save( - std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream = 0, const std::string & privateKeyPassphrase = "") const = 0; - /// Exports the public and private key to the given streams. - /// - /// If a null pointer is passed for a stream, the corresponding - /// key is not exported. - const std::string & name() const; /// Returns key pair name diff --git a/base/poco/Crypto/include/Poco/Crypto/RSAKeyImpl.h b/base/poco/Crypto/include/Poco/Crypto/RSAKeyImpl.h index 4ccbb324c06..010c68bacd7 100644 --- a/base/poco/Crypto/include/Poco/Crypto/RSAKeyImpl.h +++ b/base/poco/Crypto/include/Poco/Crypto/RSAKeyImpl.h @@ -96,20 +96,6 @@ namespace Crypto ByteVec decryptionExponent() const; /// Returns the RSA decryption exponent. - void save(const std::string & publicKeyFile, const std::string & privateKeyFile = "", const std::string & privateKeyPassphrase = "") - const; - /// Exports the public and private keys to the given files. - /// - /// If an empty filename is specified, the corresponding key - /// is not exported. - - void - save(std::ostream * pPublicKeyStream, std::ostream * pPrivateKeyStream = 0, const std::string & privateKeyPassphrase = "") const; - /// Exports the public and private key to the given streams. - /// - /// If a null pointer is passed for a stream, the corresponding - /// key is not exported. - private: RSAKeyImpl(); @@ -139,4 +125,4 @@ namespace Crypto } // namespace Poco::Crypto -#endif // Crypto_RSAKeyImplImpl_INCLUDED \ No newline at end of file +#endif // Crypto_RSAKeyImplImpl_INCLUDED diff --git a/base/poco/Crypto/src/RSAKeyImpl.cpp b/base/poco/Crypto/src/RSAKeyImpl.cpp index eb6e758343a..229a3bce828 100644 --- a/base/poco/Crypto/src/RSAKeyImpl.cpp +++ b/base/poco/Crypto/src/RSAKeyImpl.cpp @@ -269,103 +269,6 @@ RSAKeyImpl::ByteVec RSAKeyImpl::decryptionExponent() const } -void RSAKeyImpl::save(const std::string& publicKeyFile, - const std::string& privateKeyFile, - const std::string& privateKeyPassphrase) const -{ - if (!publicKeyFile.empty()) - { - BIO* bio = BIO_new(BIO_s_file()); - if (!bio) throw Poco::IOException("Cannot create BIO for writing public key file", publicKeyFile); - try - { - if (BIO_write_filename(bio, const_cast(publicKeyFile.c_str()))) - { - if (!PEM_write_bio_RSAPublicKey(bio, _pRSA)) - throw Poco::WriteFileException("Failed to write public key to file", publicKeyFile); - } - else throw Poco::CreateFileException("Cannot create public key file"); - } - catch (...) - { - BIO_free(bio); - throw; - } - BIO_free(bio); - } - - if (!privateKeyFile.empty()) - { - BIO* bio = BIO_new(BIO_s_file()); - if (!bio) throw Poco::IOException("Cannot create BIO for writing private key file", privateKeyFile); - try - { - if (BIO_write_filename(bio, const_cast(privateKeyFile.c_str()))) - { - int rc = 0; - if (privateKeyPassphrase.empty()) - rc = PEM_write_bio_RSAPrivateKey(bio, _pRSA, 0, 0, 0, 0, 0); - else - rc = PEM_write_bio_RSAPrivateKey(bio, _pRSA, EVP_des_ede3_cbc(), - reinterpret_cast(const_cast(privateKeyPassphrase.c_str())), - static_cast(privateKeyPassphrase.length()), 0, 0); - if (!rc) throw Poco::FileException("Failed to write private key to file", privateKeyFile); - } - else throw Poco::CreateFileException("Cannot create private key file", privateKeyFile); - } - catch (...) - { - BIO_free(bio); - throw; - } - BIO_free(bio); - } -} - - -void RSAKeyImpl::save(std::ostream* pPublicKeyStream, - std::ostream* pPrivateKeyStream, - const std::string& privateKeyPassphrase) const -{ - if (pPublicKeyStream) - { - BIO* bio = BIO_new(BIO_s_mem()); - if (!bio) throw Poco::IOException("Cannot create BIO for writing public key"); - if (!PEM_write_bio_RSAPublicKey(bio, _pRSA)) - { - BIO_free(bio); - throw Poco::WriteFileException("Failed to write public key to stream"); - } - char* pData; - long size = BIO_get_mem_data(bio, &pData); - pPublicKeyStream->write(pData, static_cast(size)); - BIO_free(bio); - } - - if (pPrivateKeyStream) - { - BIO* bio = BIO_new(BIO_s_mem()); - if (!bio) throw Poco::IOException("Cannot create BIO for writing public key"); - int rc = 0; - if (privateKeyPassphrase.empty()) - rc = PEM_write_bio_RSAPrivateKey(bio, _pRSA, 0, 0, 0, 0, 0); - else - rc = PEM_write_bio_RSAPrivateKey(bio, _pRSA, EVP_des_ede3_cbc(), - reinterpret_cast(const_cast(privateKeyPassphrase.c_str())), - static_cast(privateKeyPassphrase.length()), 0, 0); - if (!rc) - { - BIO_free(bio); - throw Poco::FileException("Failed to write private key to stream"); - } - char* pData; - long size = BIO_get_mem_data(bio, &pData); - pPrivateKeyStream->write(pData, static_cast(size)); - BIO_free(bio); - } -} - - RSAKeyImpl::ByteVec RSAKeyImpl::convertToByteVec(const BIGNUM* bn) { int numBytes = BN_num_bytes(bn); @@ -383,4 +286,4 @@ RSAKeyImpl::ByteVec RSAKeyImpl::convertToByteVec(const BIGNUM* bn) } -} } // namespace Poco::Crypto \ No newline at end of file +} } // namespace Poco::Crypto diff --git a/base/poco/Data/ODBC/include/Poco/Data/ODBC/Unicode_WIN32.h b/base/poco/Data/ODBC/include/Poco/Data/ODBC/Unicode_WIN32.h deleted file mode 100644 index 06af853e443..00000000000 --- a/base/poco/Data/ODBC/include/Poco/Data/ODBC/Unicode_WIN32.h +++ /dev/null @@ -1,62 +0,0 @@ -// -// Unicode.h -// -// Library: Data/ODBC -// Package: ODBC -// Module: Unicode -// -// Definition of Unicode_WIN32. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Data_ODBC_Unicode_WIN32_INCLUDED -#define Data_ODBC_Unicode_WIN32_INCLUDED - - -namespace Poco -{ -namespace Data -{ - namespace ODBC - { - - - inline void makeUTF16(SQLCHAR * pSQLChar, SQLINTEGER length, std::wstring & target) - /// Utility function for conversion from UTF-8 to UTF-16 - { - int len = length; - if (SQL_NTS == len) - len = (int)std::strlen((const char *)pSQLChar); - - UnicodeConverter::toUTF16((const char *)pSQLChar, len, target); - } - - - inline void makeUTF8(Poco::Buffer & buffer, SQLINTEGER length, SQLPOINTER pTarget, SQLINTEGER targetLength) - /// Utility function for conversion from UTF-16 to UTF-8. Length is in bytes. - { - if (buffer.sizeBytes() < length) - throw InvalidArgumentException("Specified length exceeds available length."); - else if ((length % 2) != 0) - throw InvalidArgumentException("Length must be an even number."); - - length /= sizeof(wchar_t); - std::string result; - UnicodeConverter::toUTF8(buffer.begin(), length, result); - - std::memset(pTarget, 0, targetLength); - std::strncpy((char *)pTarget, result.c_str(), result.size() < targetLength ? result.size() : targetLength); - } - - - } -} -} // namespace Poco::Data::ODBC - - -#endif // Data_ODBC_Unicode_WIN32_INCLUDED diff --git a/base/poco/Data/ODBC/src/Unicode_WIN32.cpp b/base/poco/Data/ODBC/src/Unicode_WIN32.cpp deleted file mode 100644 index fe637e49b3d..00000000000 --- a/base/poco/Data/ODBC/src/Unicode_WIN32.cpp +++ /dev/null @@ -1,761 +0,0 @@ -// -// Unicode.cpp -// -// Library: Data/ODBC -// Package: ODBC -// Module: Unicode -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Data/ODBC/ODBC.h" -#include "Poco/Data/ODBC/Utility.h" -#include "Poco/Data/ODBC/Unicode_WIN32.h" -#include "Poco/Buffer.h" -#include "Poco/Exception.h" - - -using Poco::Buffer; -using Poco::InvalidArgumentException; -using Poco::NotImplementedException; - - -namespace Poco { -namespace Data { -namespace ODBC { - - -SQLRETURN SQLColAttribute(SQLHSTMT hstmt, - SQLUSMALLINT iCol, - SQLUSMALLINT iField, - SQLPOINTER pCharAttr, - SQLSMALLINT cbCharAttrMax, - SQLSMALLINT* pcbCharAttr, - NumAttrPtrType pNumAttr) -{ - if (isString(pCharAttr, cbCharAttrMax)) - { - Buffer buffer(stringLength(pCharAttr, cbCharAttrMax)); - - SQLRETURN rc = SQLColAttributeW(hstmt, - iCol, - iField, - buffer.begin(), - (SQLSMALLINT) buffer.sizeBytes(), - pcbCharAttr, - pNumAttr); - - makeUTF8(buffer, *pcbCharAttr, pCharAttr, cbCharAttrMax); - return rc; - } - - return SQLColAttributeW(hstmt, - iCol, - iField, - pCharAttr, - cbCharAttrMax, - pcbCharAttr, - pNumAttr); -} - - -SQLRETURN SQLColAttributes(SQLHSTMT hstmt, - SQLUSMALLINT icol, - SQLUSMALLINT fDescType, - SQLPOINTER rgbDesc, - SQLSMALLINT cbDescMax, - SQLSMALLINT* pcbDesc, - SQLLEN* pfDesc) -{ - return SQLColAttribute(hstmt, - icol, - fDescType, - rgbDesc, - cbDescMax, - pcbDesc, - pfDesc); -} - - -SQLRETURN SQLConnect(SQLHDBC hdbc, - SQLCHAR* szDSN, - SQLSMALLINT cbDSN, - SQLCHAR* szUID, - SQLSMALLINT cbUID, - SQLCHAR* szAuthStr, - SQLSMALLINT cbAuthStr) -{ - std::wstring sqlDSN; - makeUTF16(szDSN, cbDSN, sqlDSN); - - std::wstring sqlUID; - makeUTF16(szUID, cbUID, sqlUID); - - std::wstring sqlPWD; - makeUTF16(szAuthStr, cbAuthStr, sqlPWD); - - return SQLConnectW(hdbc, - (SQLWCHAR*) sqlDSN.c_str(), - (SQLSMALLINT) sqlDSN.size(), - (SQLWCHAR*) sqlUID.c_str(), - (SQLSMALLINT) sqlUID.size(), - (SQLWCHAR*) sqlPWD.c_str(), - (SQLSMALLINT) sqlPWD.size()); -} - - -SQLRETURN SQLDescribeCol(SQLHSTMT hstmt, - SQLUSMALLINT icol, - SQLCHAR* szColName, - SQLSMALLINT cbColNameMax, - SQLSMALLINT* pcbColName, - SQLSMALLINT* pfSqlType, - SQLULEN* pcbColDef, - SQLSMALLINT* pibScale, - SQLSMALLINT* pfNullable) -{ - Buffer buffer(cbColNameMax); - SQLRETURN rc = SQLDescribeColW(hstmt, - icol, - (SQLWCHAR*) buffer.begin(), - (SQLSMALLINT) buffer.size(), - pcbColName, - pfSqlType, - pcbColDef, - pibScale, - pfNullable); - - makeUTF8(buffer, *pcbColName * sizeof(wchar_t), szColName, cbColNameMax); - return rc; -} - - -SQLRETURN SQLError(SQLHENV henv, - SQLHDBC hdbc, - SQLHSTMT hstmt, - SQLCHAR* szSqlState, - SQLINTEGER* pfNativeError, - SQLCHAR* szErrorMsg, - SQLSMALLINT cbErrorMsgMax, - SQLSMALLINT* pcbErrorMsg) -{ - throw NotImplementedException("SQLError is obsolete. " - "Use SQLGetDiagRec instead."); -} - - -SQLRETURN SQLExecDirect(SQLHSTMT hstmt, - SQLCHAR* szSqlStr, - SQLINTEGER cbSqlStr) -{ - std::wstring sqlStr; - makeUTF16(szSqlStr, cbSqlStr, sqlStr); - - return SQLExecDirectW(hstmt, - (SQLWCHAR*) sqlStr.c_str(), - (SQLINTEGER) sqlStr.size()); -} - - -SQLRETURN SQLGetConnectAttr(SQLHDBC hdbc, - SQLINTEGER fAttribute, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax, - SQLINTEGER* pcbValue) -{ - if (isString(rgbValue, cbValueMax)) - { - Buffer buffer(stringLength(rgbValue, cbValueMax)); - - SQLRETURN rc = SQLGetConnectAttrW(hdbc, - fAttribute, - buffer.begin(), - (SQLINTEGER) buffer.sizeBytes(), - pcbValue); - - makeUTF8(buffer, *pcbValue, rgbValue, cbValueMax); - return rc; - } - - - return SQLGetConnectAttrW(hdbc, - fAttribute, - rgbValue, - cbValueMax, - pcbValue); -} - - -SQLRETURN SQLGetCursorName(SQLHSTMT hstmt, - SQLCHAR* szCursor, - SQLSMALLINT cbCursorMax, - SQLSMALLINT* pcbCursor) -{ - throw NotImplementedException("Not implemented"); -} - - -SQLRETURN SQLSetDescField(SQLHDESC hdesc, - SQLSMALLINT iRecord, - SQLSMALLINT iField, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax) -{ - if (isString(rgbValue, cbValueMax)) - { - std::wstring str; - makeUTF16((SQLCHAR*) rgbValue, cbValueMax, str); - - SQLRETURN rc = SQLSetDescFieldW(hdesc, - iRecord, - iField, - (SQLPOINTER) str.c_str(), - (SQLINTEGER) str.size() * sizeof(std::wstring::value_type)); - - return rc; - } - - return SQLSetDescFieldW(hdesc, - iRecord, - iField, - rgbValue, - cbValueMax); -} - - -SQLRETURN SQLGetDescField(SQLHDESC hdesc, - SQLSMALLINT iRecord, - SQLSMALLINT iField, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax, - SQLINTEGER* pcbValue) -{ - if (isString(rgbValue, cbValueMax)) - { - Buffer buffer(stringLength(rgbValue, cbValueMax)); - - SQLRETURN rc = SQLGetDescFieldW(hdesc, - iRecord, - iField, - buffer.begin(), - (SQLINTEGER) buffer.sizeBytes(), - pcbValue); - - makeUTF8(buffer, *pcbValue, rgbValue, cbValueMax); - return rc; - } - - return SQLGetDescFieldW(hdesc, - iRecord, - iField, - rgbValue, - cbValueMax, - pcbValue); -} - - -SQLRETURN SQLGetDescRec(SQLHDESC hdesc, - SQLSMALLINT iRecord, - SQLCHAR* szName, - SQLSMALLINT cbNameMax, - SQLSMALLINT* pcbName, - SQLSMALLINT* pfType, - SQLSMALLINT* pfSubType, - SQLLEN* pLength, - SQLSMALLINT* pPrecision, - SQLSMALLINT* pScale, - SQLSMALLINT* pNullable) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLGetDiagField(SQLSMALLINT fHandleType, - SQLHANDLE handle, - SQLSMALLINT iRecord, - SQLSMALLINT fDiagField, - SQLPOINTER rgbDiagInfo, - SQLSMALLINT cbDiagInfoMax, - SQLSMALLINT* pcbDiagInfo) -{ - if (isString(rgbDiagInfo, cbDiagInfoMax)) - { - Buffer buffer(stringLength(rgbDiagInfo, cbDiagInfoMax)); - - SQLRETURN rc = SQLGetDiagFieldW(fHandleType, - handle, - iRecord, - fDiagField, - buffer.begin(), - (SQLSMALLINT) buffer.sizeBytes(), - pcbDiagInfo); - - makeUTF8(buffer, *pcbDiagInfo, rgbDiagInfo, cbDiagInfoMax); - return rc; - } - - return SQLGetDiagFieldW(fHandleType, - handle, - iRecord, - fDiagField, - rgbDiagInfo, - cbDiagInfoMax, - pcbDiagInfo); -} - - -SQLRETURN SQLGetDiagRec(SQLSMALLINT fHandleType, - SQLHANDLE handle, - SQLSMALLINT iRecord, - SQLCHAR* szSqlState, - SQLINTEGER* pfNativeError, - SQLCHAR* szErrorMsg, - SQLSMALLINT cbErrorMsgMax, - SQLSMALLINT* pcbErrorMsg) -{ - const SQLINTEGER stateLen = SQL_SQLSTATE_SIZE + 1; - Buffer bufState(stateLen); - Buffer bufErr(cbErrorMsgMax); - - SQLRETURN rc = SQLGetDiagRecW(fHandleType, - handle, - iRecord, - bufState.begin(), - pfNativeError, - bufErr.begin(), - (SQLSMALLINT) bufErr.size(), - pcbErrorMsg); - - makeUTF8(bufState, stateLen * sizeof(wchar_t), szSqlState, stateLen); - makeUTF8(bufErr, *pcbErrorMsg * sizeof(wchar_t), szErrorMsg, cbErrorMsgMax); - - return rc; -} - - -SQLRETURN SQLPrepare(SQLHSTMT hstmt, - SQLCHAR* szSqlStr, - SQLINTEGER cbSqlStr) -{ - std::wstring sqlStr; - makeUTF16(szSqlStr, cbSqlStr, sqlStr); - - return SQLPrepareW(hstmt, - (SQLWCHAR*) sqlStr.c_str(), - (SQLINTEGER) sqlStr.size()); -} - - -SQLRETURN SQLSetConnectAttr(SQLHDBC hdbc, - SQLINTEGER fAttribute, - SQLPOINTER rgbValue, - SQLINTEGER cbValue) -{ - if (isString(rgbValue, cbValue)) - { - std::wstring str; - makeUTF16((SQLCHAR*) rgbValue, cbValue, str); - - return SQLSetConnectAttrW(hdbc, - fAttribute, - (SQLWCHAR*) str.c_str(), - (SQLINTEGER) str.size() * sizeof(std::wstring::value_type)); - } - - return SQLSetConnectAttrW(hdbc, - fAttribute, - rgbValue, - cbValue); -} - - -SQLRETURN SQLSetCursorName(SQLHSTMT hstmt, - SQLCHAR* szCursor, - SQLSMALLINT cbCursor) -{ - throw NotImplementedException("Not implemented"); -} - - -SQLRETURN SQLSetStmtAttr(SQLHSTMT hstmt, - SQLINTEGER fAttribute, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax) -{ - if (isString(rgbValue, cbValueMax)) - { - std::wstring str; - makeUTF16((SQLCHAR*) rgbValue, cbValueMax, str); - - return SQLSetStmtAttrW(hstmt, - fAttribute, - (SQLPOINTER) str.c_str(), - (SQLINTEGER) str.size()); - } - - return SQLSetStmtAttrW(hstmt, - fAttribute, - rgbValue, - cbValueMax); -} - - -SQLRETURN SQLGetStmtAttr(SQLHSTMT hstmt, - SQLINTEGER fAttribute, - SQLPOINTER rgbValue, - SQLINTEGER cbValueMax, - SQLINTEGER* pcbValue) -{ - if (isString(rgbValue, cbValueMax)) - { - Buffer buffer(stringLength(rgbValue, cbValueMax)); - - return SQLGetStmtAttrW(hstmt, - fAttribute, - (SQLPOINTER) buffer.begin(), - (SQLINTEGER) buffer.sizeBytes(), - pcbValue); - } - - return SQLGetStmtAttrW(hstmt, - fAttribute, - rgbValue, - cbValueMax, - pcbValue); -} - - -SQLRETURN SQLColumns(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLCHAR* szColumnName, - SQLSMALLINT cbColumnName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLGetConnectOption(SQLHDBC hdbc, - SQLUSMALLINT fOption, - SQLPOINTER pvParam) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLGetInfo(SQLHDBC hdbc, - SQLUSMALLINT fInfoType, - SQLPOINTER rgbInfoValue, - SQLSMALLINT cbInfoValueMax, - SQLSMALLINT* pcbInfoValue) -{ - if (cbInfoValueMax) - { - Buffer buffer(cbInfoValueMax); - - SQLRETURN rc = SQLGetInfoW(hdbc, - fInfoType, - (SQLPOINTER) buffer.begin(), - (SQLSMALLINT) buffer.sizeBytes(), - pcbInfoValue); - - makeUTF8(buffer, *pcbInfoValue, rgbInfoValue, cbInfoValueMax); - - return rc; - } - - return SQLGetInfoW(hdbc, - fInfoType, - rgbInfoValue, - cbInfoValueMax, - pcbInfoValue); -} - - -SQLRETURN SQLGetTypeInfo(SQLHSTMT StatementHandle, SQLSMALLINT DataType) -{ - return SQLGetTypeInfoW(StatementHandle, DataType); -} - - -SQLRETURN SQLSetConnectOption(SQLHDBC hdbc, - SQLUSMALLINT fOption, - SQLULEN vParam) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLSpecialColumns(SQLHSTMT hstmt, - SQLUSMALLINT fColType, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLUSMALLINT fScope, - SQLUSMALLINT fNullable) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLStatistics(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLUSMALLINT fUnique, - SQLUSMALLINT fAccuracy) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLTables(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLCHAR* szTableType, - SQLSMALLINT cbTableType) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLDataSources(SQLHENV henv, - SQLUSMALLINT fDirection, - SQLCHAR* szDSN, - SQLSMALLINT cbDSNMax, - SQLSMALLINT* pcbDSN, - SQLCHAR* szDesc, - SQLSMALLINT cbDescMax, - SQLSMALLINT* pcbDesc) -{ - Buffer bufDSN(cbDSNMax); - Buffer bufDesc(cbDescMax); - - SQLRETURN rc = SQLDataSourcesW(henv, - fDirection, - bufDSN.begin(), - (SQLSMALLINT) bufDSN.size(), - pcbDSN, - bufDesc.begin(), - (SQLSMALLINT) bufDesc.size(), - pcbDesc); - - makeUTF8(bufDSN, *pcbDSN * sizeof(wchar_t), szDSN, cbDSNMax); - makeUTF8(bufDesc, *pcbDesc * sizeof(wchar_t), szDesc, cbDescMax); - - return rc; -} - - -SQLRETURN SQLDriverConnect(SQLHDBC hdbc, - SQLHWND hwnd, - SQLCHAR* szConnStrIn, - SQLSMALLINT cbConnStrIn, - SQLCHAR* szConnStrOut, - SQLSMALLINT cbConnStrOutMax, - SQLSMALLINT* pcbConnStrOut, - SQLUSMALLINT fDriverCompletion) -{ - std::wstring connStrIn; - int len = cbConnStrIn; - if (SQL_NTS == len) - len = (int) std::strlen((const char*) szConnStrIn); - - Poco::UnicodeConverter::toUTF16((const char *) szConnStrIn, len, connStrIn); - - Buffer bufOut(cbConnStrOutMax); - SQLRETURN rc = SQLDriverConnectW(hdbc, - hwnd, - (SQLWCHAR*) connStrIn.c_str(), - (SQLSMALLINT) connStrIn.size(), - bufOut.begin(), - (SQLSMALLINT) bufOut.size(), - pcbConnStrOut, - fDriverCompletion); - - if (!Utility::isError(rc)) - makeUTF8(bufOut, *pcbConnStrOut * sizeof(wchar_t), szConnStrOut, cbConnStrOutMax); - - return rc; -} - - -SQLRETURN SQLBrowseConnect(SQLHDBC hdbc, - SQLCHAR* szConnStrIn, - SQLSMALLINT cbConnStrIn, - SQLCHAR* szConnStrOut, - SQLSMALLINT cbConnStrOutMax, - SQLSMALLINT* pcbConnStrOut) -{ - std::wstring str; - makeUTF16(szConnStrIn, cbConnStrIn, str); - - Buffer bufConnStrOut(cbConnStrOutMax); - - SQLRETURN rc = SQLBrowseConnectW(hdbc, - (SQLWCHAR*) str.c_str(), - (SQLSMALLINT) str.size(), - bufConnStrOut.begin(), - (SQLSMALLINT) bufConnStrOut.size(), - pcbConnStrOut); - - makeUTF8(bufConnStrOut, *pcbConnStrOut * sizeof(wchar_t), szConnStrOut, cbConnStrOutMax); - - return rc; -} - - -SQLRETURN SQLColumnPrivileges(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName, - SQLCHAR* szColumnName, - SQLSMALLINT cbColumnName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLForeignKeys(SQLHSTMT hstmt, - SQLCHAR* szPkCatalogName, - SQLSMALLINT cbPkCatalogName, - SQLCHAR* szPkSchemaName, - SQLSMALLINT cbPkSchemaName, - SQLCHAR* szPkTableName, - SQLSMALLINT cbPkTableName, - SQLCHAR* szFkCatalogName, - SQLSMALLINT cbFkCatalogName, - SQLCHAR* szFkSchemaName, - SQLSMALLINT cbFkSchemaName, - SQLCHAR* szFkTableName, - SQLSMALLINT cbFkTableName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLNativeSql(SQLHDBC hdbc, - SQLCHAR* szSqlStrIn, - SQLINTEGER cbSqlStrIn, - SQLCHAR* szSqlStr, - SQLINTEGER cbSqlStrMax, - SQLINTEGER* pcbSqlStr) -{ - std::wstring str; - makeUTF16(szSqlStrIn, cbSqlStrIn, str); - - Buffer bufSQLOut(cbSqlStrMax); - - SQLRETURN rc = SQLNativeSqlW(hdbc, - (SQLWCHAR*) str.c_str(), - (SQLINTEGER) str.size(), - bufSQLOut.begin(), - (SQLINTEGER) bufSQLOut.size(), - pcbSqlStr); - - makeUTF8(bufSQLOut, *pcbSqlStr * sizeof(wchar_t), szSqlStr, cbSqlStrMax); - - return rc; -} - - -SQLRETURN SQLPrimaryKeys(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLProcedureColumns(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szProcName, - SQLSMALLINT cbProcName, - SQLCHAR* szColumnName, - SQLSMALLINT cbColumnName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLProcedures(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szProcName, - SQLSMALLINT cbProcName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLTablePrivileges(SQLHSTMT hstmt, - SQLCHAR* szCatalogName, - SQLSMALLINT cbCatalogName, - SQLCHAR* szSchemaName, - SQLSMALLINT cbSchemaName, - SQLCHAR* szTableName, - SQLSMALLINT cbTableName) -{ - throw NotImplementedException(); -} - - -SQLRETURN SQLDrivers(SQLHENV henv, - SQLUSMALLINT fDirection, - SQLCHAR* szDriverDesc, - SQLSMALLINT cbDriverDescMax, - SQLSMALLINT* pcbDriverDesc, - SQLCHAR* szDriverAttributes, - SQLSMALLINT cbDrvrAttrMax, - SQLSMALLINT* pcbDrvrAttr) -{ - Buffer bufDriverDesc(cbDriverDescMax); - Buffer bufDriverAttr(cbDrvrAttrMax); - - SQLRETURN rc = SQLDriversW(henv, - fDirection, - bufDriverDesc.begin(), - (SQLSMALLINT) bufDriverDesc.size(), - pcbDriverDesc, - bufDriverAttr.begin(), - (SQLSMALLINT) bufDriverAttr.size(), - pcbDrvrAttr); - - makeUTF8(bufDriverDesc, *pcbDriverDesc * sizeof(wchar_t), szDriverDesc, cbDriverDescMax); - makeUTF8(bufDriverAttr, *pcbDrvrAttr * sizeof(wchar_t), szDriverAttributes, cbDrvrAttrMax); - - return rc; -} - - -} } } // namespace Poco::Data::ODBC diff --git a/base/poco/Data/include/Poco/Data/AutoTransaction.h b/base/poco/Data/include/Poco/Data/AutoTransaction.h deleted file mode 100644 index a222bd27afe..00000000000 --- a/base/poco/Data/include/Poco/Data/AutoTransaction.h +++ /dev/null @@ -1,37 +0,0 @@ -// -// AutoTransaction.h -// -// Library: Data -// Package: DataCore -// Module: AutoTransaction -// -// Forward header for the Transaction class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Data_AutoTransaction_INCLUDED -#define Data_AutoTransaction_INCLUDED - - -#include "Poco/Data/Transaction.h" - - -namespace Poco -{ -namespace Data -{ - - - typedef Transaction AutoTransaction; - - -} -} // namespace Poco::Data - - -#endif // Data_AutoTransaction_INCLUDED diff --git a/base/poco/Data/include/Poco/Data/DynamicLOB.h b/base/poco/Data/include/Poco/Data/DynamicLOB.h deleted file mode 100644 index 749b269ffac..00000000000 --- a/base/poco/Data/include/Poco/Data/DynamicLOB.h +++ /dev/null @@ -1,54 +0,0 @@ -// -// DynamicLOB.h -// -// Library: Data -// Package: DataCore -// Module: DynamicLOB -// -// Definition of the Poco::Dynamic::Var LOB cast operators. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Data_DynamicLOB_INCLUDED -#define Data_DynamicLOB_INCLUDED - - -#include "Poco/Data/Data.h" -#include "Poco/Data/LOB.h" -#include "Poco/Dynamic/Var.h" - - -namespace Poco -{ -namespace Data -{ - - template - class LOB; - typedef LOB BLOB; - typedef LOB CLOB; - -} -} // namespace Poco::Data - - -namespace Poco -{ -namespace Dynamic -{ - - template <> - Data_API Var::operator Poco::Data::CLOB() const; - template <> - Data_API Var::operator Poco::Data::BLOB() const; - -} -} // namespace Poco::Dynamic - - -#endif // Data_DynamicLOB_INCLUDED diff --git a/base/poco/Data/include/Poco/Data/LOBStream.h b/base/poco/Data/include/Poco/Data/LOBStream.h deleted file mode 100644 index 23346224c0f..00000000000 --- a/base/poco/Data/include/Poco/Data/LOBStream.h +++ /dev/null @@ -1,149 +0,0 @@ -// -// LOBStream.h -// -// Library: Data -// Package: DataCore -// Module: LOBStream -// -// Definition of the LOBStream class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Data_LOBStream_INCLUDED -#define Data_LOBStream_INCLUDED - - -#include -#include -#include "Poco/Data/LOB.h" -#include "Poco/Foundation.h" -#include "Poco/UnbufferedStreamBuf.h" - - -namespace Poco -{ -namespace Data -{ - - - template - class LOBStreamBuf : public BasicUnbufferedStreamBuf> - /// This is the streambuf class used for reading from and writing to a LOB. - { - public: - LOBStreamBuf(LOB & lob) : _lob(lob), _it(_lob.begin()) - /// Creates LOBStreamBuf. - { - } - - - ~LOBStreamBuf() - /// Destroys LOBStreamBuf. - { - } - - protected: - typedef std::char_traits TraitsType; - typedef BasicUnbufferedStreamBuf BaseType; - - typename BaseType::int_type readFromDevice() - { - if (_it != _lob.end()) - return BaseType::charToInt(*_it++); - else - return -1; - } - - typename BaseType::int_type writeToDevice(T c) - { - _lob.appendRaw(&c, 1); - return 1; - } - - private: - LOB & _lob; - typename LOB::Iterator _it; - }; - - - template - class LOBIOS : public virtual std::ios - /// The base class for LOBInputStream and - /// LOBOutputStream. - /// - /// This class is needed to ensure the correct initialization - /// order of the stream buffer and base classes. - { - public: - LOBIOS(LOB & lob, openmode mode) : _buf(lob) - /// Creates the LOBIOS with the given LOB. - { - poco_ios_init(&_buf); - } - - ~LOBIOS() - /// Destroys the LOBIOS. - { - } - - LOBStreamBuf * rdbuf() - /// Returns a pointer to the internal LOBStreamBuf. - { - return &_buf; - } - - protected: - LOBStreamBuf _buf; - }; - - - template - class LOBOutputStream : public LOBIOS, public std::basic_ostream> - /// An output stream for writing to a LOB. - { - public: - LOBOutputStream(LOB & lob) : LOBIOS(lob, std::ios::out), std::ostream(LOBIOS::rdbuf()) - /// Creates the LOBOutputStream with the given LOB. - { - } - - ~LOBOutputStream() - /// Destroys the LOBOutputStream. - { - } - }; - - - template - class LOBInputStream : public LOBIOS, public std::basic_istream> - /// An input stream for reading from a LOB. - { - public: - LOBInputStream(LOB & lob) : LOBIOS(lob, std::ios::in), std::istream(LOBIOS::rdbuf()) - /// Creates the LOBInputStream with the given LOB. - { - } - - ~LOBInputStream() - /// Destroys the LOBInputStream. - { - } - }; - - - typedef LOBOutputStream BLOBOutputStream; - typedef LOBOutputStream CLOBOutputStream; - - typedef LOBInputStream BLOBInputStream; - typedef LOBInputStream CLOBInputStream; - -} -} // namespace Poco::Data - - -#endif // Data_LOBStream_INCLUDED diff --git a/base/poco/Data/src/DynamicLOB.cpp b/base/poco/Data/src/DynamicLOB.cpp deleted file mode 100644 index 5dfe3df8574..00000000000 --- a/base/poco/Data/src/DynamicLOB.cpp +++ /dev/null @@ -1,74 +0,0 @@ -// -// DynamicLOB.cpp -// -// Library: Data -// Package: DataCore -// Module: DynamicLOB -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifdef __GNUC__ -// TODO: determine g++ version able to do the right thing without these specializations - -#include "Poco/Data/DynamicLOB.h" -#include "Poco/Data/LOB.h" -#include "Poco/Dynamic/Var.h" - - -namespace Poco { -namespace Dynamic { - - -using Poco::Data::CLOB; -using Poco::Data::BLOB; - - -template <> -Var::operator CLOB () const -{ - VarHolder* pHolder = content(); - - if (!pHolder) - throw InvalidAccessException("Can not convert empty value."); - - if (typeid(CLOB) == pHolder->type()) - return extract(); - else - { - std::string result; - pHolder->convert(result); - return CLOB(result); - } -} - - -template <> -Var::operator BLOB () const -{ - VarHolder* pHolder = content(); - - if (!pHolder) - throw InvalidAccessException("Can not convert empty value."); - - if (typeid(BLOB) == pHolder->type()) - return extract(); - else - { - std::string result; - pHolder->convert(result); - return BLOB(reinterpret_cast(result.data()), - result.size()); - } -} - - -} } // namespace Poco::Data - - -#endif // __GNUC__ - diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt index dceb18e68cc..358f49ed055 100644 --- a/base/poco/Foundation/CMakeLists.txt +++ b/base/poco/Foundation/CMakeLists.txt @@ -31,8 +31,6 @@ set (SRCS src/ASCIIEncoding.cpp src/AsyncChannel.cpp src/AtomicCounter.cpp - src/Base32Decoder.cpp - src/Base32Encoder.cpp src/Base64Decoder.cpp src/Base64Encoder.cpp src/BinaryReader.cpp @@ -81,9 +79,6 @@ set (SRCS src/HexBinaryEncoder.cpp src/InflatingStream.cpp src/JSONString.cpp - src/Latin1Encoding.cpp - src/Latin2Encoding.cpp - src/Latin9Encoding.cpp src/LineEndingConverter.cpp src/LocalDateTime.cpp src/LogFile.cpp @@ -91,8 +86,6 @@ set (SRCS src/LoggingFactory.cpp src/LoggingRegistry.cpp src/LogStream.cpp - src/Manifest.cpp - src/MD4Engine.cpp src/MD5Engine.cpp src/MemoryPool.cpp src/MemoryStream.cpp @@ -113,7 +106,6 @@ set (SRCS src/PatternFormatter.cpp src/Pipe.cpp src/PipeImpl.cpp - src/PipeStream.cpp src/PriorityNotificationQueue.cpp src/Process.cpp src/PurgeStrategy.cpp @@ -136,10 +128,8 @@ set (SRCS src/StreamChannel.cpp src/StreamConverter.cpp src/StreamCopier.cpp - src/StreamTokenizer.cpp src/String.cpp src/StringTokenizer.cpp - src/SynchronizedObject.cpp src/SyslogChannel.cpp src/Task.cpp src/TaskManager.cpp @@ -175,9 +165,6 @@ set (SRCS src/VarHolder.cpp src/VarIterator.cpp src/Void.cpp - src/Windows1250Encoding.cpp - src/Windows1251Encoding.cpp - src/Windows1252Encoding.cpp ) add_library (_poco_foundation ${SRCS}) @@ -233,7 +220,8 @@ target_link_libraries (_poco_foundation PRIVATE Poco::Foundation::PCRE ch_contrib::zlib - ch_contrib::lz4) + ch_contrib::lz4 + ch_contrib::double_conversion) if(OS_DARWIN AND ARCH_AARCH64) target_compile_definitions (_poco_foundation diff --git a/base/poco/Foundation/include/Poco/Base32Decoder.h b/base/poco/Foundation/include/Poco/Base32Decoder.h deleted file mode 100644 index 96b6f013db8..00000000000 --- a/base/poco/Foundation/include/Poco/Base32Decoder.h +++ /dev/null @@ -1,105 +0,0 @@ -// -// Base32Decoder.h -// -// Library: Foundation -// Package: Streams -// Module: Base32 -// -// Definition of class Base32Decoder. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Base32Decoder_INCLUDED -#define Foundation_Base32Decoder_INCLUDED - - -#include -#include "Poco/Foundation.h" -#include "Poco/UnbufferedStreamBuf.h" - - -namespace Poco -{ - - -class Foundation_API Base32DecoderBuf : public UnbufferedStreamBuf -/// This streambuf base32-decodes all data read -/// from the istream connected to it. -/// -/// Note: For performance reasons, the characters -/// are read directly from the given istream's -/// underlying streambuf, so the state -/// of the istream will not reflect that of -/// its streambuf. -{ -public: - Base32DecoderBuf(std::istream & istr); - ~Base32DecoderBuf(); - -private: - int readFromDevice(); - int readOne(); - - unsigned char _group[8]; - int _groupLength; - int _groupIndex; - std::streambuf & _buf; - - static unsigned char IN_ENCODING[256]; - static bool IN_ENCODING_INIT; - -private: - Base32DecoderBuf(const Base32DecoderBuf &); - Base32DecoderBuf & operator=(const Base32DecoderBuf &); -}; - - -class Foundation_API Base32DecoderIOS : public virtual std::ios -/// The base class for Base32Decoder. -/// -/// This class is needed to ensure the correct initialization -/// order of the stream buffer and base classes. -{ -public: - Base32DecoderIOS(std::istream & istr); - ~Base32DecoderIOS(); - Base32DecoderBuf * rdbuf(); - -protected: - Base32DecoderBuf _buf; - -private: - Base32DecoderIOS(const Base32DecoderIOS &); - Base32DecoderIOS & operator=(const Base32DecoderIOS &); -}; - - -class Foundation_API Base32Decoder : public Base32DecoderIOS, public std::istream -/// This istream base32-decodes all data -/// read from the istream connected to it. -/// -/// Note: For performance reasons, the characters -/// are read directly from the given istream's -/// underlying streambuf, so the state -/// of the istream will not reflect that of -/// its streambuf. -{ -public: - Base32Decoder(std::istream & istr); - ~Base32Decoder(); - -private: - Base32Decoder(const Base32Decoder &); - Base32Decoder & operator=(const Base32Decoder &); -}; - - -} // namespace Poco - - -#endif // Foundation_Base32Decoder_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Base32Encoder.h b/base/poco/Foundation/include/Poco/Base32Encoder.h deleted file mode 100644 index ced0dd6f3bb..00000000000 --- a/base/poco/Foundation/include/Poco/Base32Encoder.h +++ /dev/null @@ -1,111 +0,0 @@ -// -// Base32Encoder.h -// -// Library: Foundation -// Package: Streams -// Module: Base32 -// -// Definition of class Base32Encoder. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Base32Encoder_INCLUDED -#define Foundation_Base32Encoder_INCLUDED - - -#include -#include "Poco/Foundation.h" -#include "Poco/UnbufferedStreamBuf.h" - - -namespace Poco -{ - - -class Foundation_API Base32EncoderBuf : public UnbufferedStreamBuf -/// This streambuf base32-encodes all data written -/// to it and forwards it to a connected -/// ostream. -/// -/// Note: The characters are directly written -/// to the ostream's streambuf, thus bypassing -/// the ostream. The ostream's state is therefore -/// not updated to match the buffer's state. -{ -public: - Base32EncoderBuf(std::ostream & ostr, bool padding = true); - ~Base32EncoderBuf(); - - int close(); - /// Closes the stream buffer. - -private: - int writeToDevice(char c); - - unsigned char _group[5]; - int _groupLength; - std::streambuf & _buf; - bool _doPadding; - - static const unsigned char OUT_ENCODING[32]; - - friend class Base32DecoderBuf; - - Base32EncoderBuf(const Base32EncoderBuf &); - Base32EncoderBuf & operator=(const Base32EncoderBuf &); -}; - - -class Foundation_API Base32EncoderIOS : public virtual std::ios -/// The base class for Base32Encoder. -/// -/// This class is needed to ensure the correct initialization -/// order of the stream buffer and base classes. -{ -public: - Base32EncoderIOS(std::ostream & ostr, bool padding = true); - ~Base32EncoderIOS(); - int close(); - Base32EncoderBuf * rdbuf(); - -protected: - Base32EncoderBuf _buf; - -private: - Base32EncoderIOS(const Base32EncoderIOS &); - Base32EncoderIOS & operator=(const Base32EncoderIOS &); -}; - - -class Foundation_API Base32Encoder : public Base32EncoderIOS, public std::ostream -/// This ostream base32-encodes all data -/// written to it and forwards it to -/// a connected ostream. -/// Always call close() when done -/// writing data, to ensure proper -/// completion of the encoding operation. -/// -/// Note: The characters are directly written -/// to the ostream's streambuf, thus bypassing -/// the ostream. The ostream's state is therefore -/// not updated to match the buffer's state. -{ -public: - Base32Encoder(std::ostream & ostr, bool padding = true); - ~Base32Encoder(); - -private: - Base32Encoder(const Base32Encoder &); - Base32Encoder & operator=(const Base32Encoder &); -}; - - -} // namespace Poco - - -#endif // Foundation_Base32Encoder_INCLUDED diff --git a/base/poco/Foundation/include/Poco/ClassLibrary.h b/base/poco/Foundation/include/Poco/ClassLibrary.h deleted file mode 100644 index deb43f26297..00000000000 --- a/base/poco/Foundation/include/Poco/ClassLibrary.h +++ /dev/null @@ -1,92 +0,0 @@ -// -// ClassLibrary.h -// -// Library: Foundation -// Package: SharedLibrary -// Module: ClassLoader -// -// Definitions for class libraries. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_ClassLibrary_INCLUDED -#define Foundation_ClassLibrary_INCLUDED - - -#include -#include "Poco/Foundation.h" -#include "Poco/Manifest.h" - - -# define POCO_LIBRARY_API - - -// -// the entry points for every class library -// -extern "C" { -bool POCO_LIBRARY_API pocoBuildManifest(Poco::ManifestBase * pManifest); -void POCO_LIBRARY_API pocoInitializeLibrary(); -void POCO_LIBRARY_API pocoUninitializeLibrary(); -} - - -// -// additional support for named manifests -// -#define POCO_DECLARE_NAMED_MANIFEST(name) \ - extern "C" { \ - bool POCO_LIBRARY_API POCO_JOIN(pocoBuildManifest, name)(Poco::ManifestBase * pManifest); \ - } - - -// -// Macros to automatically implement pocoBuildManifest -// -// usage: -// -// POCO_BEGIN_MANIFEST(MyBaseClass) -// POCO_EXPORT_CLASS(MyFirstClass) -// POCO_EXPORT_CLASS(MySecondClass) -// ... -// POCO_END_MANIFEST -// -#define POCO_BEGIN_MANIFEST_IMPL(fnName, base) \ - bool fnName(Poco::ManifestBase * pManifest_) \ - { \ - typedef base _Base; \ - typedef Poco::Manifest<_Base> _Manifest; \ - std::string requiredType(typeid(_Manifest).name()); \ - std::string actualType(pManifest_->className()); \ - if (requiredType == actualType) \ - { \ - Poco::Manifest<_Base> * pManifest = static_cast<_Manifest *>(pManifest_); - - -#define POCO_BEGIN_MANIFEST(base) POCO_BEGIN_MANIFEST_IMPL(pocoBuildManifest, base) - - -#define POCO_BEGIN_NAMED_MANIFEST(name, base) \ - POCO_DECLARE_NAMED_MANIFEST(name) \ - POCO_BEGIN_MANIFEST_IMPL(POCO_JOIN(pocoBuildManifest, name), base) - - -#define POCO_END_MANIFEST \ - return true; \ - } \ - else return false; \ - } - - -#define POCO_EXPORT_CLASS(cls) pManifest->insert(new Poco::MetaObject(#cls)); - - -#define POCO_EXPORT_SINGLETON(cls) pManifest->insert(new Poco::MetaSingleton(#cls)); - - -#endif // Foundation_ClassLibrary_INCLUDED diff --git a/base/poco/Foundation/include/Poco/ClassLoader.h b/base/poco/Foundation/include/Poco/ClassLoader.h deleted file mode 100644 index 6752a6e7ecd..00000000000 --- a/base/poco/Foundation/include/Poco/ClassLoader.h +++ /dev/null @@ -1,355 +0,0 @@ -// -// ClassLoader.h -// -// Library: Foundation -// Package: SharedLibrary -// Module: ClassLoader -// -// Definition of the ClassLoader class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_ClassLoader_INCLUDED -#define Foundation_ClassLoader_INCLUDED - - -#include -#include "Poco/Exception.h" -#include "Poco/Foundation.h" -#include "Poco/Manifest.h" -#include "Poco/MetaObject.h" -#include "Poco/Mutex.h" -#include "Poco/SharedLibrary.h" - - -namespace Poco -{ - - -template -class ClassLoader -/// The ClassLoader loads C++ classes from shared libraries -/// at runtime. It must be instantiated with a root class -/// of the loadable classes. -/// For a class to be loadable from a library, the library -/// must provide a Manifest of all the classes it contains. -/// The Manifest for a shared library can be easily built -/// with the help of the macros in the header file -/// "Foundation/ClassLibrary.h". -/// -/// Starting with POCO release 1.3, a class library can -/// export multiple manifests. In addition to the default -/// (unnamed) manifest, multiple named manifests can -/// be exported, each having a different base class. -/// -/// There is one important restriction: one instance of -/// ClassLoader can only load one manifest from a class -/// library. -{ -public: - typedef AbstractMetaObject Meta; - typedef Manifest Manif; - typedef void (*InitializeLibraryFunc)(); - typedef void (*UninitializeLibraryFunc)(); - typedef bool (*BuildManifestFunc)(ManifestBase *); - - struct LibraryInfo - { - SharedLibrary * pLibrary; - const Manif * pManifest; - int refCount; - }; - typedef std::map LibraryMap; - - class Iterator - /// The ClassLoader's very own iterator class. - { - public: - typedef std::pair Pair; - - Iterator(const typename LibraryMap::const_iterator & it) { _it = it; } - Iterator(const Iterator & it) { _it = it._it; } - ~Iterator() { } - Iterator & operator=(const Iterator & it) - { - _it = it._it; - return *this; - } - inline bool operator==(const Iterator & it) const { return _it == it._it; } - inline bool operator!=(const Iterator & it) const { return _it != it._it; } - Iterator & operator++() // prefix - { - ++_it; - return *this; - } - Iterator operator++(int) // postfix - { - Iterator result(_it); - ++_it; - return result; - } - inline const Pair * operator*() const - { - _pair.first = _it->first; - _pair.second = _it->second.pManifest; - return &_pair; - } - inline const Pair * operator->() const - { - _pair.first = _it->first; - _pair.second = _it->second.pManifest; - return &_pair; - } - - private: - typename LibraryMap::const_iterator _it; - mutable Pair _pair; - }; - - ClassLoader() - /// Creates the ClassLoader. - { - } - - virtual ~ClassLoader() - /// Destroys the ClassLoader. - { - for (typename LibraryMap::const_iterator it = _map.begin(); it != _map.end(); ++it) - { - delete it->second.pLibrary; - delete it->second.pManifest; - } - } - - void loadLibrary(const std::string & path, const std::string & manifest) - /// Loads a library from the given path, using the given manifest. - /// Does nothing if the library is already loaded. - /// Throws a LibraryLoadException if the library - /// cannot be loaded or does not have a Manifest. - /// If the library exports a function named "pocoInitializeLibrary", - /// this function is executed. - /// If called multiple times for the same library, - /// the number of calls to unloadLibrary() must be the same - /// for the library to become unloaded. - { - FastMutex::ScopedLock lock(_mutex); - - typename LibraryMap::iterator it = _map.find(path); - if (it == _map.end()) - { - LibraryInfo li; - li.pLibrary = 0; - li.pManifest = 0; - li.refCount = 1; - try - { - li.pLibrary = new SharedLibrary(path); - li.pManifest = new Manif(); - std::string pocoBuildManifestSymbol("pocoBuildManifest"); - pocoBuildManifestSymbol.append(manifest); - if (li.pLibrary->hasSymbol("pocoInitializeLibrary")) - { - InitializeLibraryFunc initializeLibrary = (InitializeLibraryFunc)li.pLibrary->getSymbol("pocoInitializeLibrary"); - initializeLibrary(); - } - if (li.pLibrary->hasSymbol(pocoBuildManifestSymbol)) - { - BuildManifestFunc buildManifest = (BuildManifestFunc)li.pLibrary->getSymbol(pocoBuildManifestSymbol); - if (buildManifest(const_cast(li.pManifest))) - _map[path] = li; - else - throw LibraryLoadException(std::string("Manifest class mismatch in ") + path, manifest); - } - else - throw LibraryLoadException(std::string("No manifest in ") + path, manifest); - } - catch (...) - { - delete li.pLibrary; - delete li.pManifest; - throw; - } - } - else - { - ++it->second.refCount; - } - } - - void loadLibrary(const std::string & path) - /// Loads a library from the given path. Does nothing - /// if the library is already loaded. - /// Throws a LibraryLoadException if the library - /// cannot be loaded or does not have a Manifest. - /// If the library exports a function named "pocoInitializeLibrary", - /// this function is executed. - /// If called multiple times for the same library, - /// the number of calls to unloadLibrary() must be the same - /// for the library to become unloaded. - /// - /// Equivalent to loadLibrary(path, ""). - { - loadLibrary(path, ""); - } - - void unloadLibrary(const std::string & path) - /// Unloads the given library. - /// Be extremely cautious when unloading shared libraries. - /// If objects from the library are still referenced somewhere, - /// a total crash is very likely. - /// If the library exports a function named "pocoUninitializeLibrary", - /// this function is executed before it is unloaded. - /// If loadLibrary() has been called multiple times for the same - /// library, the number of calls to unloadLibrary() must be the same - /// for the library to become unloaded. - { - FastMutex::ScopedLock lock(_mutex); - - typename LibraryMap::iterator it = _map.find(path); - if (it != _map.end()) - { - if (--it->second.refCount == 0) - { - if (it->second.pLibrary->hasSymbol("pocoUninitializeLibrary")) - { - UninitializeLibraryFunc uninitializeLibrary - = (UninitializeLibraryFunc)it->second.pLibrary->getSymbol("pocoUninitializeLibrary"); - uninitializeLibrary(); - } - delete it->second.pManifest; - it->second.pLibrary->unload(); - delete it->second.pLibrary; - _map.erase(it); - } - } - else - throw NotFoundException(path); - } - - const Meta * findClass(const std::string & className) const - /// Returns a pointer to the MetaObject for the given - /// class, or a null pointer if the class is not known. - { - FastMutex::ScopedLock lock(_mutex); - - for (typename LibraryMap::const_iterator it = _map.begin(); it != _map.end(); ++it) - { - const Manif * pManif = it->second.pManifest; - typename Manif::Iterator itm = pManif->find(className); - if (itm != pManif->end()) - return *itm; - } - return 0; - } - - const Meta & classFor(const std::string & className) const - /// Returns a reference to the MetaObject for the given - /// class. Throws a NotFoundException if the class - /// is not known. - { - const Meta * pMeta = findClass(className); - if (pMeta) - return *pMeta; - else - throw NotFoundException(className); - } - - Base * create(const std::string & className) const - /// Creates an instance of the given class. - /// Throws a NotFoundException if the class - /// is not known. - { - return classFor(className).create(); - } - - Base & instance(const std::string & className) const - /// Returns a reference to the sole instance of - /// the given class. The class must be a singleton, - /// otherwise an InvalidAccessException will be thrown. - /// Throws a NotFoundException if the class - /// is not known. - { - return classFor(className).instance(); - } - - bool canCreate(const std::string & className) const - /// Returns true if create() can create new instances - /// of the class. - { - return classFor(className).canCreate(); - } - - void destroy(const std::string & className, Base * pObject) const - /// Destroys the object pObject points to. - /// Does nothing if object is not found. - { - classFor(className).destroy(pObject); - } - - bool isAutoDelete(const std::string & className, Base * pObject) const - /// Returns true if the object is automatically - /// deleted by its meta object. - { - return classFor(className).isAutoDelete(pObject); - } - - const Manif * findManifest(const std::string & path) const - /// Returns a pointer to the Manifest for the given - /// library, or a null pointer if the library has not been loaded. - { - FastMutex::ScopedLock lock(_mutex); - - typename LibraryMap::const_iterator it = _map.find(path); - if (it != _map.end()) - return it->second.pManifest; - else - return 0; - } - - const Manif & manifestFor(const std::string & path) const - /// Returns a reference to the Manifest for the given library - /// Throws a NotFoundException if the library has not been loaded. - { - const Manif * pManif = findManifest(path); - if (pManif) - return *pManif; - else - throw NotFoundException(path); - } - - bool isLibraryLoaded(const std::string & path) const - /// Returns true if the library with the given name - /// has already been loaded. - { - return findManifest(path) != 0; - } - - Iterator begin() const - { - FastMutex::ScopedLock lock(_mutex); - - return Iterator(_map.begin()); - } - - Iterator end() const - { - FastMutex::ScopedLock lock(_mutex); - - return Iterator(_map.end()); - } - -private: - LibraryMap _map; - mutable FastMutex _mutex; -}; - - -} // namespace Poco - - -#endif // Foundation_ClassLoader_INCLUDED diff --git a/base/poco/Foundation/include/Poco/EventLogChannel.h b/base/poco/Foundation/include/Poco/EventLogChannel.h deleted file mode 100644 index e6a79088204..00000000000 --- a/base/poco/Foundation/include/Poco/EventLogChannel.h +++ /dev/null @@ -1,102 +0,0 @@ -// -// EventLogChannel.h -// -// Library: Foundation -// Package: Logging -// Module: EventLogChannel -// -// Definition of the EventLogChannel class specific to WIN32. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_EventLogChannel_INCLUDED -#define Foundation_EventLogChannel_INCLUDED - - -#include "Poco/Channel.h" -#include "Poco/Foundation.h" -#include "Poco/UnWindows.h" - - -namespace Poco -{ - - -class Foundation_API EventLogChannel : public Channel -/// This Windows-only channel works with the Windows NT Event Log -/// service. -/// -/// To work properly, the EventLogChannel class requires that either -/// the PocoFoundation.dll or the PocoMsg.dll Dynamic Link Library -/// containing the message definition resources can be found in $PATH. -{ -public: - EventLogChannel(); - /// Creates the EventLogChannel. - /// The name of the current application (or more correctly, - /// the name of its executable) is taken as event source name. - - EventLogChannel(const std::string & name); - /// Creates the EventLogChannel with the given event source name. - - EventLogChannel(const std::string & name, const std::string & host); - /// Creates an EventLogChannel with the given event source - /// name that routes messages to the given host. - - void open(); - /// Opens the EventLogChannel. If necessary, the - /// required registry entries to register a - /// message resource DLL are made. - - void close(); - /// Closes the EventLogChannel. - - void log(const Message & msg); - /// Logs the given message to the Windows Event Log. - /// - /// The message type and priority are mapped to - /// appropriate values for Event Log type and category. - - void setProperty(const std::string & name, const std::string & value); - /// Sets or changes a configuration property. - /// - /// The following properties are supported: - /// - /// * name: The name of the event source. - /// * loghost: The name of the host where the Event Log service is running. - /// The default is "localhost". - /// * host: same as host. - /// * logfile: The name of the log file. The default is "Application". - - std::string getProperty(const std::string & name) const; - /// Returns the value of the given property. - - static const std::string PROP_NAME; - static const std::string PROP_HOST; - static const std::string PROP_LOGHOST; - static const std::string PROP_LOGFILE; - -protected: - ~EventLogChannel(); - static int getType(const Message & msg); - static int getCategory(const Message & msg); - void setUpRegistry() const; - static std::string findLibrary(const char * name); - -private: - std::string _name; - std::string _host; - std::string _logFile; - HANDLE _h; -}; - - -} // namespace Poco - - -#endif // Foundation_EventLogChannel_INCLUDED diff --git a/base/poco/Foundation/include/Poco/FPEnvironment_DUMMY.h b/base/poco/Foundation/include/Poco/FPEnvironment_DUMMY.h deleted file mode 100644 index 3d8f3612a7a..00000000000 --- a/base/poco/Foundation/include/Poco/FPEnvironment_DUMMY.h +++ /dev/null @@ -1,126 +0,0 @@ -// -// FPEnvironment_DUMMY.h -// -// Library: Foundation -// Package: Core -// Module: FPEnvironment -// -// Definition of class FPEnvironmentImpl for platforms that do not -// support IEEE 754 extensions. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_FPEnvironment_DUMMY_INCLUDED -#define Foundation_FPEnvironment_DUMMY_INCLUDED - - -#include -#include "Poco/Foundation.h" - - -namespace Poco -{ - - -class Foundation_API FPEnvironmentImpl -{ -protected: - enum RoundingModeImpl - { - FP_ROUND_DOWNWARD_IMPL, - FP_ROUND_UPWARD_IMPL, - FP_ROUND_TONEAREST_IMPL, - FP_ROUND_TOWARDZERO_IMPL - }; - enum FlagImpl - { - FP_DIVIDE_BY_ZERO_IMPL, - FP_INEXACT_IMPL, - FP_OVERFLOW_IMPL, - FP_UNDERFLOW_IMPL, - FP_INVALID_IMPL - }; - FPEnvironmentImpl(); - FPEnvironmentImpl(const FPEnvironmentImpl & env); - ~FPEnvironmentImpl(); - FPEnvironmentImpl & operator=(const FPEnvironmentImpl & env); - void keepCurrentImpl(); - static void clearFlagsImpl(); - static bool isFlagImpl(FlagImpl flag); - static void setRoundingModeImpl(RoundingModeImpl mode); - static RoundingModeImpl getRoundingModeImpl(); - static bool isInfiniteImpl(float value); - static bool isInfiniteImpl(double value); - static bool isInfiniteImpl(long double value); - static bool isNaNImpl(float value); - static bool isNaNImpl(double value); - static bool isNaNImpl(long double value); - static float copySignImpl(float target, float source); - static double copySignImpl(double target, double source); - static long double copySignImpl(long double target, long double source); - -private: - static RoundingModeImpl _roundingMode; -}; - - -// -// inlines -// -inline bool FPEnvironmentImpl::isInfiniteImpl(float value) -{ - return std::isinf(value) != 0; -} - - -inline bool FPEnvironmentImpl::isInfiniteImpl(double value) -{ - return std::isinf(value) != 0; -} - - -inline bool FPEnvironmentImpl::isInfiniteImpl(long double value) -{ - return std::isinf((double)value) != 0; -} - - -inline bool FPEnvironmentImpl::isNaNImpl(float value) -{ - return std::isnan(value) != 0; -} - - -inline bool FPEnvironmentImpl::isNaNImpl(double value) -{ - return std::isnan(value) != 0; -} - - -inline bool FPEnvironmentImpl::isNaNImpl(long double value) -{ - return std::isnan((double)value) != 0; -} - - -inline float FPEnvironmentImpl::copySignImpl(float target, float source) -{ - return copysignf(target, source); -} - - -inline double FPEnvironmentImpl::copySignImpl(double target, double source) -{ - return copysign(target, source); -} - - -} // namespace Poco - - -#endif // Foundation_FPEnvironment_DUMMY_INCLUDED diff --git a/base/poco/Foundation/include/Poco/FileStream_WIN32.h b/base/poco/Foundation/include/Poco/FileStream_WIN32.h deleted file mode 100644 index 7d6670b9315..00000000000 --- a/base/poco/Foundation/include/Poco/FileStream_WIN32.h +++ /dev/null @@ -1,72 +0,0 @@ -// -// FileStream_WIN32.h -// -// Library: Foundation -// Package: Streams -// Module: FileStream -// -// Definition of the FileStreamBuf, FileInputStream and FileOutputStream classes. -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_FileStream_WIN32_INCLUDED -#define Foundation_FileStream_WIN32_INCLUDED - - -#include "Poco/BufferedBidirectionalStreamBuf.h" -#include "Poco/Foundation.h" -#include "Poco/UnWindows.h" - - -namespace Poco -{ - - -class Foundation_API FileStreamBuf : public BufferedBidirectionalStreamBuf -/// This stream buffer handles Fileio -{ -public: - FileStreamBuf(); - /// Creates a FileStreamBuf. - - ~FileStreamBuf(); - /// Destroys the FileStream. - - void open(const std::string & path, std::ios::openmode mode); - /// Opens the given file in the given mode. - - bool close(); - /// Closes the File stream buffer. Returns true if successful, - /// false otherwise. - - std::streampos seekoff(std::streamoff off, std::ios::seekdir dir, std::ios::openmode mode = std::ios::in | std::ios::out); - /// change position by offset, according to way and mode - - std::streampos seekpos(std::streampos pos, std::ios::openmode mode = std::ios::in | std::ios::out); - /// change to specified position, according to mode - -protected: - enum - { - BUFFER_SIZE = 4096 - }; - - int readFromDevice(char * buffer, std::streamsize length); - int writeToDevice(const char * buffer, std::streamsize length); - -private: - std::string _path; - HANDLE _handle; - UInt64 _pos; -}; - - -} // namespace Poco - - -#endif // Foundation_FileStream_WIN32_INCLUDED diff --git a/base/poco/Foundation/include/Poco/HashSet.h b/base/poco/Foundation/include/Poco/HashSet.h deleted file mode 100644 index 8082b2813f4..00000000000 --- a/base/poco/Foundation/include/Poco/HashSet.h +++ /dev/null @@ -1,176 +0,0 @@ -// -// HashSet.h -// -// Library: Foundation -// Package: Hashing -// Module: HashSet -// -// Definition of the HashSet class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_HashSet_INCLUDED -#define Foundation_HashSet_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/LinearHashTable.h" - - -namespace Poco -{ - - -template > -class HashSet -/// This class implements a set using a LinearHashTable. -/// -/// A HashSet can be used just like a std::set. -{ -public: - typedef Value ValueType; - typedef Value & Reference; - typedef const Value & ConstReference; - typedef Value * Pointer; - typedef const Value * ConstPointer; - typedef HashFunc Hash; - - typedef LinearHashTable HashTable; - - typedef typename HashTable::Iterator Iterator; - typedef typename HashTable::ConstIterator ConstIterator; - - HashSet() - /// Creates an empty HashSet. - { - } - - HashSet(std::size_t initialReserve) : _table(initialReserve) - /// Creates the HashSet, using the given initialReserve. - { - } - - HashSet(const HashSet & set) : _table(set._table) - /// Creates the HashSet by copying another one. - { - } - - ~HashSet() - /// Destroys the HashSet. - { - } - - HashSet & operator=(const HashSet & table) - /// Assigns another HashSet. - { - HashSet tmp(table); - swap(tmp); - return *this; - } - - void swap(HashSet & set) - /// Swaps the HashSet with another one. - { - _table.swap(set._table); - } - - ConstIterator begin() const - /// Returns an iterator pointing to the first entry, if one exists. - { - return _table.begin(); - } - - ConstIterator end() const - /// Returns an iterator pointing to the end of the table. - { - return _table.end(); - } - - Iterator begin() - /// Returns an iterator pointing to the first entry, if one exists. - { - return _table.begin(); - } - - Iterator end() - /// Returns an iterator pointing to the end of the table. - { - return _table.end(); - } - - ConstIterator find(const ValueType & value) const - /// Finds an entry in the table. - { - return _table.find(value); - } - - Iterator find(const ValueType & value) - /// Finds an entry in the table. - { - return _table.find(value); - } - - std::size_t count(const ValueType & value) const - /// Returns the number of elements with the given - /// value, with is either 1 or 0. - { - return _table.count(value); - } - - std::pair insert(const ValueType & value) - /// Inserts an element into the set. - /// - /// If the element already exists in the set, - /// a pair(iterator, false) with iterator pointing to the - /// existing element is returned. - /// Otherwise, the element is inserted an a - /// pair(iterator, true) with iterator - /// pointing to the new element is returned. - { - return _table.insert(value); - } - - void erase(Iterator it) - /// Erases the element pointed to by it. - { - _table.erase(it); - } - - void erase(const ValueType & value) - /// Erases the element with the given value, if it exists. - { - _table.erase(value); - } - - void clear() - /// Erases all elements. - { - _table.clear(); - } - - std::size_t size() const - /// Returns the number of elements in the table. - { - return _table.size(); - } - - bool empty() const - /// Returns true iff the table is empty. - { - return _table.empty(); - } - -private: - HashTable _table; -}; - - -} // namespace Poco - - -#endif // Foundation_HashSet_INCLUDED diff --git a/base/poco/Foundation/include/Poco/HashTable.h b/base/poco/Foundation/include/Poco/HashTable.h deleted file mode 100644 index b160c9c2d6b..00000000000 --- a/base/poco/Foundation/include/Poco/HashTable.h +++ /dev/null @@ -1,352 +0,0 @@ -// -// HashTable.h -// -// Library: Foundation -// Package: Hashing -// Module: HashTable -// -// Definition of the HashTable class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_HashTable_INCLUDED -#define Foundation_HashTable_INCLUDED - - -#include -#include -#include -#include -#include "Poco/Exception.h" -#include "Poco/Foundation.h" -#include "Poco/HashFunction.h" -#include "Poco/HashStatistic.h" - - -namespace Poco -{ - - -//@ deprecated -template > -class HashTable -/// A HashTable stores a key value pair that can be looked up via a hashed key. -/// -/// Collision handling is done via overflow maps(!). With small hash tables performance of this -/// data struct will be closer to that a map than a hash table, i.e. slower. On the plus side, -/// this class offers remove operations. Also HashTable full errors are not possible. If a fast -/// HashTable implementation is needed and the remove operation is not required, use SimpleHashTable -/// instead. -/// -/// This class is NOT thread safe. -{ -public: - typedef std::map HashEntryMap; - typedef HashEntryMap ** HashTableVector; - - typedef typename HashEntryMap::const_iterator ConstIterator; - typedef typename HashEntryMap::iterator Iterator; - - HashTable(UInt32 initialSize = 251) : _entries(0), _size(0), _maxCapacity(initialSize) - /// Creates the HashTable. - { - _entries = new HashEntryMap *[initialSize]; - memset(_entries, '\0', sizeof(HashEntryMap *) * initialSize); - } - - HashTable(const HashTable & ht) : _entries(new HashEntryMap *[ht._maxCapacity]), _size(ht._size), _maxCapacity(ht._maxCapacity) - { - for (UInt32 i = 0; i < _maxCapacity; ++i) - { - if (ht._entries[i]) - _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end()); - else - _entries[i] = 0; - } - } - - ~HashTable() - /// Destroys the HashTable. - { - clear(); - } - - HashTable & operator=(const HashTable & ht) - { - if (this != &ht) - { - clear(); - _maxCapacity = ht._maxCapacity; - poco_assert_dbg(_entries == 0); - _entries = new HashEntryMap *[_maxCapacity]; - _size = ht._size; - - for (UInt32 i = 0; i < _maxCapacity; ++i) - { - if (ht._entries[i]) - _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end()); - else - _entries[i] = 0; - } - } - return *this; - } - - void clear() - { - if (!_entries) - return; - for (UInt32 i = 0; i < _maxCapacity; ++i) - { - delete _entries[i]; - } - delete[] _entries; - _entries = 0; - _size = 0; - _maxCapacity = 0; - } - - UInt32 insert(const Key & key, const Value & value) - /// Returns the hash value of the inserted item. - /// Throws an exception if the entry was already inserted - { - UInt32 hsh = hash(key); - insertRaw(key, hsh, value); - return hsh; - } - - Value & insertRaw(const Key & key, UInt32 hsh, const Value & value) - /// Returns the hash value of the inserted item. - /// Throws an exception if the entry was already inserted - { - if (!_entries[hsh]) - _entries[hsh] = new HashEntryMap(); - std::pair res(_entries[hsh]->insert(std::make_pair(key, value))); - if (!res.second) - throw InvalidArgumentException("HashTable::insert, key already exists."); - _size++; - return res.first->second; - } - - UInt32 update(const Key & key, const Value & value) - /// Returns the hash value of the inserted item. - /// Replaces an existing entry if it finds one - { - UInt32 hsh = hash(key); - updateRaw(key, hsh, value); - return hsh; - } - - void updateRaw(const Key & key, UInt32 hsh, const Value & value) - /// Returns the hash value of the inserted item. - /// Replaces an existing entry if it finds one - { - if (!_entries[hsh]) - _entries[hsh] = new HashEntryMap(); - std::pair res = _entries[hsh]->insert(std::make_pair(key, value)); - if (res.second == false) - res.first->second = value; - else - _size++; - } - - void remove(const Key & key) - { - UInt32 hsh = hash(key); - removeRaw(key, hsh); - } - - void removeRaw(const Key & key, UInt32 hsh) - /// Performance version, allows to specify the hash value - { - if (_entries[hsh]) - { - _size -= _entries[hsh]->erase(key); - } - } - - UInt32 hash(const Key & key) const { return _hash(key, _maxCapacity); } - - const Value & get(const Key & key) const - /// Throws an exception if the value does not exist - { - UInt32 hsh = hash(key); - return getRaw(key, hsh); - } - - const Value & getRaw(const Key & key, UInt32 hsh) const - /// Throws an exception if the value does not exist - { - if (!_entries[hsh]) - throw InvalidArgumentException("key not found"); - - ConstIterator it = _entries[hsh]->find(key); - if (it == _entries[hsh]->end()) - throw InvalidArgumentException("key not found"); - - return it->second; - } - - Value & get(const Key & key) - /// Throws an exception if the value does not exist - { - UInt32 hsh = hash(key); - return const_cast(getRaw(key, hsh)); - } - - const Value & operator[](const Key & key) const { return get(key); } - - Value & operator[](const Key & key) - { - UInt32 hsh = hash(key); - - if (!_entries[hsh]) - return insertRaw(key, hsh, Value()); - - ConstIterator it = _entries[hsh]->find(key); - if (it == _entries[hsh]->end()) - return insertRaw(key, hsh, Value()); - - return it->second; - } - - const Key & getKeyRaw(const Key & key, UInt32 hsh) - /// Throws an exception if the key does not exist. returns a reference to the internally - /// stored key. Useful when someone does an insert and wants for performance reason only to store - /// a pointer to the key in another collection - { - if (!_entries[hsh]) - throw InvalidArgumentException("key not found"); - ConstIterator it = _entries[hsh]->find(key); - if (it == _entries[hsh]->end()) - throw InvalidArgumentException("key not found"); - return it->first; - } - - bool get(const Key & key, Value & v) const - /// Sets v to the found value, returns false if no value was found - { - UInt32 hsh = hash(key); - return getRaw(key, hsh, v); - } - - bool getRaw(const Key & key, UInt32 hsh, Value & v) const - /// Sets v to the found value, returns false if no value was found - { - if (!_entries[hsh]) - return false; - - ConstIterator it = _entries[hsh]->find(key); - if (it == _entries[hsh]->end()) - return false; - - v = it->second; - return true; - } - - bool exists(const Key & key) - { - UInt32 hsh = hash(key); - return existsRaw(key, hsh); - } - - bool existsRaw(const Key & key, UInt32 hsh) { return _entries[hsh] && (_entries[hsh]->end() != _entries[hsh]->find(key)); } - - std::size_t size() const - /// Returns the number of elements already inserted into the HashTable - { - return _size; - } - - UInt32 maxCapacity() const { return _maxCapacity; } - - void resize(UInt32 newSize) - /// Resizes the hashtable, rehashes all existing entries. Expensive! - { - if (_maxCapacity != newSize) - { - HashTableVector cpy = _entries; - _entries = 0; - UInt32 oldSize = _maxCapacity; - _maxCapacity = newSize; - _entries = new HashEntryMap *[_maxCapacity]; - memset(_entries, '\0', sizeof(HashEntryMap *) * _maxCapacity); - - if (_size == 0) - { - // no data was yet inserted - delete[] cpy; - return; - } - _size = 0; - for (UInt32 i = 0; i < oldSize; ++i) - { - if (cpy[i]) - { - ConstIterator it = cpy[i]->begin(); - ConstIterator itEnd = cpy[i]->end(); - for (; it != itEnd; ++it) - { - insert(it->first, it->second); - } - delete cpy[i]; - } - } - delete[] cpy; - } - } - - HashStatistic currentState(bool details = false) const - /// Returns the current internal state - { - UInt32 numberOfEntries = (UInt32)_size; - UInt32 numZeroEntries = 0; - UInt32 maxEntriesPerHash = 0; - std::vector detailedEntriesPerHash; -#ifdef _DEBUG - UInt32 totalSize = 0; -#endif - for (UInt32 i = 0; i < _maxCapacity; ++i) - { - if (_entries[i]) - { - UInt32 size = (UInt32)_entries[i]->size(); - poco_assert_dbg(size != 0); - if (size > maxEntriesPerHash) - maxEntriesPerHash = size; - if (details) - detailedEntriesPerHash.push_back(size); -#ifdef _DEBUG - totalSize += size; -#endif - } - else - { - numZeroEntries++; - if (details) - detailedEntriesPerHash.push_back(0); - } - } -#ifdef _DEBUG - poco_assert_dbg(totalSize == numberOfEntries); -#endif - return HashStatistic(_maxCapacity, numberOfEntries, numZeroEntries, maxEntriesPerHash, detailedEntriesPerHash); - } - -private: - HashTableVector _entries; - std::size_t _size; - UInt32 _maxCapacity; - KeyHashFunction _hash; -}; - - -} // namespace Poco - - -#endif // Foundation_HashTable_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Latin1Encoding.h b/base/poco/Foundation/include/Poco/Latin1Encoding.h deleted file mode 100644 index 279ecd477f4..00000000000 --- a/base/poco/Foundation/include/Poco/Latin1Encoding.h +++ /dev/null @@ -1,52 +0,0 @@ -// -// Latin1Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Latin1Encoding -// -// Definition of the Latin1Encoding class. -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Latin1Encoding_INCLUDED -#define Foundation_Latin1Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Latin1Encoding : public TextEncoding -/// ISO Latin-1 (8859-1) text encoding. -{ -public: - Latin1Encoding(); - ~Latin1Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Latin1Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Latin2Encoding.h b/base/poco/Foundation/include/Poco/Latin2Encoding.h deleted file mode 100644 index b86c183de17..00000000000 --- a/base/poco/Foundation/include/Poco/Latin2Encoding.h +++ /dev/null @@ -1,55 +0,0 @@ -// -// Latin2Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Latin2Encoding -// -// Definition of the Latin2Encoding class. -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Latin2Encoding_INCLUDED -#define Foundation_Latin2Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Latin2Encoding : public TextEncoding -/// ISO Latin-2 (8859-2) text encoding. -/// -/// Latin-2 is basically Latin-1 with the EURO sign plus -/// some other minor changes. -{ -public: - Latin2Encoding(); - virtual ~Latin2Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Latin2Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Latin9Encoding.h b/base/poco/Foundation/include/Poco/Latin9Encoding.h deleted file mode 100644 index db672e15bba..00000000000 --- a/base/poco/Foundation/include/Poco/Latin9Encoding.h +++ /dev/null @@ -1,55 +0,0 @@ -// -// Latin9Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Latin9Encoding -// -// Definition of the Latin9Encoding class. -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Latin9Encoding_INCLUDED -#define Foundation_Latin9Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Latin9Encoding : public TextEncoding -/// ISO Latin-9 (8859-15) text encoding. -/// -/// Latin-9 is basically Latin-1 with the EURO sign plus -/// some other minor changes. -{ -public: - Latin9Encoding(); - ~Latin9Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Latin9Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/MD4Engine.h b/base/poco/Foundation/include/Poco/MD4Engine.h deleted file mode 100644 index a772726955e..00000000000 --- a/base/poco/Foundation/include/Poco/MD4Engine.h +++ /dev/null @@ -1,96 +0,0 @@ -// -// MD4Engine.h -// -// Library: Foundation -// Package: Crypt -// Module: MD4Engine -// -// Definition of class MD4Engine. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// -// -// MD4 (RFC 1320) algorithm: -// Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All -// rights reserved. -// -// License to copy and use this software is granted provided that it -// is identified as the "RSA Data Security, Inc. MD4 Message-Digest -// Algorithm" in all material mentioning or referencing this software -// or this function. -// -// License is also granted to make and use derivative works provided -// that such works are identified as "derived from the RSA Data -// Security, Inc. MD4 Message-Digest Algorithm" in all material -// mentioning or referencing the derived work. -// -// RSA Data Security, Inc. makes no representations concerning either -// the merchantability of this software or the suitability of this -// software for any particular purpose. It is provided "as is" -// without express or implied warranty of any kind. -// -// These notices must be retained in any copies of any part of this -// documentation and/or software. -// - - -#ifndef Foundation_MD4Engine_INCLUDED -#define Foundation_MD4Engine_INCLUDED - - -#include "Poco/DigestEngine.h" -#include "Poco/Foundation.h" - - -namespace Poco -{ - - -class Foundation_API MD4Engine : public DigestEngine -/// This class implements the MD4 message digest algorithm, -/// described in RFC 1320. -{ -public: - enum - { - BLOCK_SIZE = 64, - DIGEST_SIZE = 16 - }; - - MD4Engine(); - ~MD4Engine(); - - std::size_t digestLength() const; - void reset(); - const DigestEngine::Digest & digest(); - -protected: - void updateImpl(const void * data, std::size_t length); - -private: - static void transform(UInt32 state[4], const unsigned char block[64]); - static void encode(unsigned char * output, const UInt32 * input, std::size_t len); - static void decode(UInt32 * output, const unsigned char * input, std::size_t len); - - struct Context - { - UInt32 state[4]; // state (ABCD) - UInt32 count[2]; // number of bits, modulo 2^64 (lsb first) - unsigned char buffer[64]; // input buffer - }; - - Context _context; - DigestEngine::Digest _digest; - - MD4Engine(const MD4Engine &); - MD4Engine & operator=(const MD4Engine &); -}; - - -} // namespace Poco - - -#endif // Foundation_MD5Engine_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Manifest.h b/base/poco/Foundation/include/Poco/Manifest.h deleted file mode 100644 index 1835d6e64ce..00000000000 --- a/base/poco/Foundation/include/Poco/Manifest.h +++ /dev/null @@ -1,152 +0,0 @@ -// -// Manifest.h -// -// Library: Foundation -// Package: SharedLibrary -// Module: ClassLoader -// -// Definition of the Manifest class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Manifest_INCLUDED -#define Foundation_Manifest_INCLUDED - - -#include -#include -#include "Poco/Foundation.h" -#include "Poco/MetaObject.h" - - -namespace Poco -{ - - -class Foundation_API ManifestBase -/// ManifestBase is a common base class for -/// all instantiations of Manifest. -{ -public: - ManifestBase(); - virtual ~ManifestBase(); - - virtual const char * className() const = 0; - /// Returns the type name of the manifest's class. -}; - - -template -class Manifest : public ManifestBase -/// A Manifest maintains a list of all classes -/// contained in a dynamically loadable class -/// library. -/// Internally, the information is held -/// in a map. An iterator is provided to -/// iterate over all the classes in a Manifest. -{ -public: - typedef AbstractMetaObject Meta; - typedef std::map MetaMap; - - class Iterator - /// The Manifest's very own iterator class. - { - public: - Iterator(const typename MetaMap::const_iterator & it) { _it = it; } - Iterator(const Iterator & it) { _it = it._it; } - ~Iterator() { } - Iterator & operator=(const Iterator & it) - { - _it = it._it; - return *this; - } - inline bool operator==(const Iterator & it) const { return _it == it._it; } - inline bool operator!=(const Iterator & it) const { return _it != it._it; } - Iterator & operator++() // prefix - { - ++_it; - return *this; - } - Iterator operator++(int) // postfix - { - Iterator result(_it); - ++_it; - return result; - } - inline const Meta * operator*() const { return _it->second; } - inline const Meta * operator->() const { return _it->second; } - - private: - typename MetaMap::const_iterator _it; - }; - - Manifest() - /// Creates an empty Manifest. - { - } - - virtual ~Manifest() - /// Destroys the Manifest. - { - clear(); - } - - Iterator find(const std::string & className) const - /// Returns an iterator pointing to the MetaObject - /// for the given class. If the MetaObject cannot - /// be found, the iterator points to end(). - { - return Iterator(_metaMap.find(className)); - } - - Iterator begin() const { return Iterator(_metaMap.begin()); } - - Iterator end() const { return Iterator(_metaMap.end()); } - - bool insert(const Meta * pMeta) - /// Inserts a MetaObject. Returns true if insertion - /// was successful, false if a class with the same - /// name already exists. - { - return _metaMap.insert(typename MetaMap::value_type(pMeta->name(), pMeta)).second; - } - - void clear() - /// Removes all MetaObjects from the manifest. - { - for (typename MetaMap::iterator it = _metaMap.begin(); it != _metaMap.end(); ++it) - { - delete it->second; - } - _metaMap.clear(); - } - - int size() const - /// Returns the number of MetaObjects in the Manifest. - { - return int(_metaMap.size()); - } - - bool empty() const - /// Returns true iff the Manifest does not contain any MetaObjects. - { - return _metaMap.empty(); - } - - const char * className() const { return typeid(*this).name(); } - -private: - MetaMap _metaMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Manifest_INCLUDED diff --git a/base/poco/Foundation/include/Poco/PipeImpl_DUMMY.h b/base/poco/Foundation/include/Poco/PipeImpl_DUMMY.h deleted file mode 100644 index c707e5f6f0f..00000000000 --- a/base/poco/Foundation/include/Poco/PipeImpl_DUMMY.h +++ /dev/null @@ -1,50 +0,0 @@ -// -// PipeImpl_DUMMY.h -// -// Library: Foundation -// Package: Processes -// Module: PipeImpl -// -// Definition of the PipeImpl_DUMMY class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_PipeImpl_DUMMY_INCLUDED -#define Foundation_PipeImpl_DUMMY_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/RefCountedObject.h" - - -namespace Poco -{ - - -class Foundation_API PipeImpl : public RefCountedObject -/// A dummy implementation of PipeImpl for platforms -/// that do not support pipes. -{ -public: - typedef int Handle; - - PipeImpl(); - ~PipeImpl(); - int writeBytes(const void * buffer, int length); - int readBytes(void * buffer, int length); - Handle readHandle() const; - Handle writeHandle() const; - void closeRead(); - void closeWrite(); -}; - - -} // namespace Poco - - -#endif // Foundation_PipeImpl_DUMMY_INCLUDED diff --git a/base/poco/Foundation/include/Poco/PipeStream.h b/base/poco/Foundation/include/Poco/PipeStream.h deleted file mode 100644 index a797cade010..00000000000 --- a/base/poco/Foundation/include/Poco/PipeStream.h +++ /dev/null @@ -1,121 +0,0 @@ -// -// PipeStream.h -// -// Library: Foundation -// Package: Processes -// Module: PipeStream -// -// Definition of the PipeStream class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_PipeStream_INCLUDED -#define Foundation_PipeStream_INCLUDED - - -#include -#include -#include "Poco/BufferedStreamBuf.h" -#include "Poco/Foundation.h" -#include "Poco/Pipe.h" - - -namespace Poco -{ - - -class Foundation_API PipeStreamBuf : public BufferedStreamBuf -/// This is the streambuf class used for reading from and writing to a Pipe. -{ -public: - typedef BufferedStreamBuf::openmode openmode; - - PipeStreamBuf(const Pipe & pipe, openmode mode); - /// Creates a PipeStreamBuf with the given Pipe. - - ~PipeStreamBuf(); - /// Destroys the PipeStreamBuf. - - void close(); - /// Closes the pipe. - -protected: - int readFromDevice(char * buffer, std::streamsize length); - int writeToDevice(const char * buffer, std::streamsize length); - -private: - enum - { - STREAM_BUFFER_SIZE = 1024 - }; - - Pipe _pipe; -}; - - -class Foundation_API PipeIOS : public virtual std::ios -/// The base class for PipeInputStream and -/// PipeOutputStream. -/// -/// This class is needed to ensure the correct initialization -/// order of the stream buffer and base classes. -{ -public: - PipeIOS(const Pipe & pipe, openmode mode); - /// Creates the PipeIOS with the given Pipe. - - ~PipeIOS(); - /// Destroys the PipeIOS. - /// - /// Flushes the buffer, but does not close the pipe. - - PipeStreamBuf * rdbuf(); - /// Returns a pointer to the internal PipeStreamBuf. - - void close(); - /// Flushes the stream and closes the pipe. - -protected: - PipeStreamBuf _buf; -}; - - -class Foundation_API PipeOutputStream : public PipeIOS, public std::ostream -/// An output stream for writing to a Pipe. -{ -public: - PipeOutputStream(const Pipe & pipe); - /// Creates the PipeOutputStream with the given Pipe. - - ~PipeOutputStream(); - /// Destroys the PipeOutputStream. - /// - /// Flushes the buffer, but does not close the pipe. -}; - - -class Foundation_API PipeInputStream : public PipeIOS, public std::istream -/// An input stream for reading from a Pipe. -/// -/// Using formatted input from a PipeInputStream -/// is not recommended, due to the read-ahead behavior of -/// istream with formatted reads. -{ -public: - PipeInputStream(const Pipe & pipe); - /// Creates the PipeInputStream with the given Pipe. - - ~PipeInputStream(); - /// Destroys the PipeInputStream. -}; - - -} // namespace Poco - - -#endif // Foundation_PipeStream_INCLUDED diff --git a/base/poco/Foundation/include/Poco/SharedMemory_DUMMY.h b/base/poco/Foundation/include/Poco/SharedMemory_DUMMY.h deleted file mode 100644 index da5269dabad..00000000000 --- a/base/poco/Foundation/include/Poco/SharedMemory_DUMMY.h +++ /dev/null @@ -1,89 +0,0 @@ -// -// SharedMemoryImpl.h -// -// Library: Foundation -// Package: Processes -// Module: SharedMemoryImpl -// -// Definition of the SharedMemoryImpl class. -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_SharedMemoryImpl_INCLUDED -#define Foundation_SharedMemoryImpl_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/RefCountedObject.h" -#include "Poco/SharedMemory.h" - - -namespace Poco -{ - - -class Foundation_API SharedMemoryImpl : public RefCountedObject -/// A dummy implementation of shared memory, for systems -/// that do not have shared memory support. -{ -public: - SharedMemoryImpl(const std::string & id, std::size_t size, SharedMemory::AccessMode mode, const void * addr, bool server); - /// Creates or connects to a shared memory object with the given name. - /// - /// For maximum portability, name should be a valid Unix filename and not - /// contain any slashes or backslashes. - /// - /// An address hint can be passed to the system, specifying the desired - /// start address of the shared memory area. Whether the hint - /// is actually honored is, however, up to the system. Windows platform - /// will generally ignore the hint. - - SharedMemoryImpl(const Poco::File & aFile, SharedMemory::AccessMode mode, const void * addr); - /// Maps the entire contents of file into a shared memory segment. - /// - /// An address hint can be passed to the system, specifying the desired - /// start address of the shared memory area. Whether the hint - /// is actually honored is, however, up to the system. Windows platform - /// will generally ignore the hint. - - char * begin() const; - /// Returns the start address of the shared memory segment. - - char * end() const; - /// Returns the one-past-end end address of the shared memory segment. - -protected: - ~SharedMemoryImpl(); - /// Destroys the SharedMemoryImpl. - -private: - SharedMemoryImpl(); - SharedMemoryImpl(const SharedMemoryImpl &); - SharedMemoryImpl & operator=(const SharedMemoryImpl &); -}; - - -// -// inlines -// -inline char * SharedMemoryImpl::begin() const -{ - return 0; -} - - -inline char * SharedMemoryImpl::end() const -{ - return 0; -} - - -} // namespace Poco - - -#endif // Foundation_SharedMemoryImpl_INCLUDED diff --git a/base/poco/Foundation/include/Poco/SimpleHashTable.h b/base/poco/Foundation/include/Poco/SimpleHashTable.h deleted file mode 100644 index b0fcd99aafa..00000000000 --- a/base/poco/Foundation/include/Poco/SimpleHashTable.h +++ /dev/null @@ -1,387 +0,0 @@ -// -// SimpleHashTable.h -// -// Library: Foundation -// Package: Hashing -// Module: SimpleHashTable -// -// Definition of the SimpleHashTable class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_SimpleHashTable_INCLUDED -#define Foundation_SimpleHashTable_INCLUDED - - -#include -#include -#include -#include -#include "Poco/Exception.h" -#include "Poco/Foundation.h" -#include "Poco/HashFunction.h" -#include "Poco/HashStatistic.h" - - -namespace Poco -{ - - -//@ deprecated -template > -class SimpleHashTable -/// A SimpleHashTable stores a key value pair that can be looked up via a hashed key. -/// -/// In comparison to a HashTable, this class handles collisions by sequentially searching the next -/// free location. This also means that the maximum size of this table is limited, i.e. if the hash table -/// is full, it will throw an exception and that this class does not support remove operations. -/// On the plus side it is faster than the HashTable. -/// -/// This class is NOT thread safe. -{ -public: - class HashEntry - { - public: - Key key; - Value value; - HashEntry(const Key k, const Value v) : key(k), value(v) { } - }; - - typedef std::vector HashTableVector; - - SimpleHashTable(UInt32 capacity = 251) : _entries(capacity, 0), _size(0), _capacity(capacity) - /// Creates the SimpleHashTable. - { - } - - SimpleHashTable(const SimpleHashTable & ht) : _size(ht._size), _capacity(ht._capacity) - { - _entries.reserve(ht._capacity); - for (typename HashTableVector::iterator it = ht._entries.begin(); it != ht._entries.end(); ++it) - { - if (*it) - _entries.push_back(new HashEntry(*it)); - else - _entries.push_back(0); - } - } - - ~SimpleHashTable() - /// Destroys the SimpleHashTable. - { - clear(); - } - - SimpleHashTable & operator=(const SimpleHashTable & ht) - { - if (this != &ht) - { - SimpleHashTable tmp(ht); - swap(tmp); - } - return *this; - } - - void swap(SimpleHashTable & ht) - { - using std::swap; - swap(_entries, ht._entries); - swap(_size, ht._size); - swap(_capacity, ht._capacity); - } - - void clear() - { - for (typename HashTableVector::iterator it = _entries.begin(); it != _entries.end(); ++it) - { - delete *it; - *it = 0; - } - _size = 0; - } - - UInt32 insert(const Key & key, const Value & value) - /// Returns the hash value of the inserted item. - /// Throws an exception if the entry was already inserted - { - UInt32 hsh = hash(key); - insertRaw(key, hsh, value); - return hsh; - } - - Value & insertRaw(const Key & key, UInt32 hsh, const Value & value) - /// Returns the hash value of the inserted item. - /// Throws an exception if the entry was already inserted - { - UInt32 pos = hsh; - if (!_entries[pos]) - _entries[pos] = new HashEntry(key, value); - else - { - UInt32 origHash = hsh; - while (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - throw ExistsException(); - if (hsh - origHash > _capacity) - throw PoolOverflowException("SimpleHashTable full"); - hsh++; - } - pos = hsh % _capacity; - _entries[pos] = new HashEntry(key, value); - } - _size++; - return _entries[pos]->value; - } - - UInt32 update(const Key & key, const Value & value) - /// Returns the hash value of the inserted item. - /// Replaces an existing entry if it finds one - { - UInt32 hsh = hash(key); - updateRaw(key, hsh, value); - return hsh; - } - - void updateRaw(const Key & key, UInt32 hsh, const Value & value) - /// Returns the hash value of the inserted item. - /// Replaces an existing entry if it finds one - { - if (!_entries[hsh]) - _entries[hsh] = new HashEntry(key, value); - else - { - UInt32 origHash = hsh; - while (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - _entries[hsh % _capacity]->value = value; - return; - } - if (hsh - origHash > _capacity) - throw PoolOverflowException("SimpleHashTable full"); - hsh++; - } - _entries[hsh % _capacity] = new HashEntry(key, value); - } - _size++; - } - - UInt32 hash(const Key & key) const { return _hash(key, _capacity); } - - const Value & get(const Key & key) const - /// Throws an exception if the value does not exist - { - UInt32 hsh = hash(key); - return getRaw(key, hsh); - } - - const Value & getRaw(const Key & key, UInt32 hsh) const - /// Throws an exception if the value does not exist - { - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - return _entries[hsh % _capacity]->value; - } - } - else - throw InvalidArgumentException("value not found"); - if (hsh - origHash > _capacity) - throw InvalidArgumentException("value not found"); - hsh++; - } - } - - Value & get(const Key & key) - /// Throws an exception if the value does not exist - { - UInt32 hsh = hash(key); - return const_cast(getRaw(key, hsh)); - } - - const Value & operator[](const Key & key) const { return get(key); } - - Value & operator[](const Key & key) - { - UInt32 hsh = hash(key); - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - return _entries[hsh % _capacity]->value; - } - } - else - return insertRaw(key, hsh, Value()); - if (hsh - origHash > _capacity) - return insertRaw(key, hsh, Value()); - hsh++; - } - } - - const Key & getKeyRaw(const Key & key, UInt32 hsh) - /// Throws an exception if the key does not exist. returns a reference to the internally - /// stored key. Useful when someone does an insert and wants for performance reason only to store - /// a pointer to the key in another collection - { - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - return _entries[hsh % _capacity]->key; - } - } - else - throw InvalidArgumentException("key not found"); - - if (hsh - origHash > _capacity) - throw InvalidArgumentException("key not found"); - hsh++; - } - } - - bool get(const Key & key, Value & v) const - /// Sets v to the found value, returns false if no value was found - { - UInt32 hsh = hash(key); - return getRaw(key, hsh, v); - } - - bool getRaw(const Key & key, UInt32 hsh, Value & v) const - /// Sets v to the found value, returns false if no value was found - { - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - v = _entries[hsh % _capacity]->value; - return true; - } - } - else - return false; - if (hsh - origHash > _capacity) - return false; - hsh++; - } - } - - bool exists(const Key & key) const - { - UInt32 hsh = hash(key); - return existsRaw(key, hsh); - } - - bool existsRaw(const Key & key, UInt32 hsh) const - { - UInt32 origHash = hsh; - while (true) - { - if (_entries[hsh % _capacity]) - { - if (_entries[hsh % _capacity]->key == key) - { - return true; - } - } - else - return false; - if (hsh - origHash > _capacity) - return false; - hsh++; - } - } - - std::size_t size() const - /// Returns the number of elements already inserted into the SimpleHashTable - { - return _size; - } - - UInt32 capacity() const { return _capacity; } - - void resize(UInt32 newSize) - /// Resizes the hashtable, rehashes all existing entries. Expensive! - { - if (_capacity != newSize) - { - SimpleHashTable tmp(newSize); - swap(tmp); - for (typename HashTableVector::const_iterator it = tmp._entries.begin(); it != tmp._entries.end(); ++it) - { - if (*it) - { - insertRaw((*it)->key, hash((*it)->key), (*it)->value); - } - } - } - } - - HashStatistic currentState(bool details = false) const - /// Returns the current internal state - { - UInt32 numberOfEntries = (UInt32)_size; - UInt32 numZeroEntries = 0; - UInt32 maxEntriesPerHash = 0; - std::vector detailedEntriesPerHash; -#ifdef _DEBUG - UInt32 totalSize = 0; -#endif - for (int i = 0; i < _capacity; ++i) - { - if (_entries[i]) - { - maxEntriesPerHash = 1; - UInt32 size = 1; - if (details) - detailedEntriesPerHash.push_back(size); -#ifdef _DEBUG - totalSize += size; -#endif - } - else - { - numZeroEntries++; - if (details) - detailedEntriesPerHash.push_back(0); - } - } -#ifdef _DEBUG - poco_assert_dbg(totalSize == numberOfEntries); -#endif - return HashStatistic(_capacity, numberOfEntries, numZeroEntries, maxEntriesPerHash, detailedEntriesPerHash); - } - -private: - HashTableVector _entries; - std::size_t _size; - UInt32 _capacity; - KeyHashFunction _hash; -}; - - -} // namespace Poco - - -#endif // Foundation_HashTable_INCLUDED diff --git a/base/poco/Foundation/include/Poco/StreamTokenizer.h b/base/poco/Foundation/include/Poco/StreamTokenizer.h deleted file mode 100644 index bb08e71f81a..00000000000 --- a/base/poco/Foundation/include/Poco/StreamTokenizer.h +++ /dev/null @@ -1,98 +0,0 @@ -// -// StreamTokenizer.h -// -// Library: Foundation -// Package: Streams -// Module: StreamTokenizer -// -// Definition of the StreamTokenizer class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_StreamTokenizer_INCLUDED -#define Foundation_StreamTokenizer_INCLUDED - - -#include -#include -#include "Poco/Foundation.h" -#include "Poco/Token.h" - - -namespace Poco -{ - - -class Foundation_API StreamTokenizer -/// A stream tokenizer splits an input stream -/// into a sequence of tokens of different kinds. -/// Various token kinds can be registered with -/// the tokenizer. -{ -public: - StreamTokenizer(); - /// Creates a StreamTokenizer with no attached stream. - - StreamTokenizer(std::istream & istr); - /// Creates a StreamTokenizer with no attached stream. - - virtual ~StreamTokenizer(); - /// Destroys the StreamTokenizer and deletes all - /// registered tokens. - - void attachToStream(std::istream & istr); - /// Attaches the tokenizer to an input stream. - - void addToken(Token * pToken); - /// Adds a token class to the tokenizer. The - /// tokenizer takes ownership of the token and - /// deletes it when no longer needed. Comment - /// and whitespace tokens will be marked as - /// ignorable, which means that next() will not - /// return them. - - void addToken(Token * pToken, bool ignore); - /// Adds a token class to the tokenizer. The - /// tokenizer takes ownership of the token and - /// deletes it when no longer needed. - /// If ignore is true, the token will be marked - /// as ignorable, which means that next() will - /// not return it. - - const Token * next(); - /// Extracts the next token from the input stream. - /// Returns a pointer to an EOFToken if there are - /// no more characters to read. - /// Returns a pointer to an InvalidToken if an - /// invalid character is encountered. - /// If a token is marked as ignorable, it will not - /// be returned, and the next token will be - /// examined. - /// Never returns a NULL pointer. - /// You must not delete the token returned by next(). - -private: - struct TokenInfo - { - Token * pToken; - bool ignore; - }; - - typedef std::vector TokenVec; - - TokenVec _tokens; - std::istream * _pIstr; - InvalidToken _invalidToken; - EOFToken _eofToken; -}; - - -} // namespace Poco - - -#endif // Foundation_StreamTokenizer_INCLUDED diff --git a/base/poco/Foundation/include/Poco/SynchronizedObject.h b/base/poco/Foundation/include/Poco/SynchronizedObject.h deleted file mode 100644 index f0d4311b107..00000000000 --- a/base/poco/Foundation/include/Poco/SynchronizedObject.h +++ /dev/null @@ -1,132 +0,0 @@ -// -// SynchronizedObject.h -// -// Library: Foundation -// Package: Threading -// Module: SynchronizedObject -// -// Definition of the SynchronizedObject class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_SynchronizedObject_INCLUDED -#define Foundation_SynchronizedObject_INCLUDED - - -#include "Poco/Event.h" -#include "Poco/Foundation.h" -#include "Poco/Mutex.h" - - -namespace Poco -{ - - -class Foundation_API SynchronizedObject -/// This class aggregates a Mutex and an Event -/// and can act as a base class for all objects -/// requiring synchronization in a multithreaded -/// scenario. -{ -public: - typedef Poco::ScopedLock ScopedLock; - - SynchronizedObject(); - /// Creates the object. - - virtual ~SynchronizedObject(); - /// Destroys the object. - - void lock() const; - /// Locks the object. Blocks if the object - /// is locked by another thread. - - bool tryLock() const; - /// Tries to lock the object. Returns false immediately - /// if the object is already locked by another thread - /// Returns true if the object was successfully locked. - - void unlock() const; - /// Unlocks the object so that it can be locked by - /// other threads. - - void notify() const; - /// Signals the object. - /// Exactly only one thread waiting for the object - /// can resume execution. - - void wait() const; - /// Waits for the object to become signalled. - - void wait(long milliseconds) const; - /// Waits for the object to become signalled. - /// Throws a TimeoutException if the object - /// does not become signalled within the specified - /// time interval. - - bool tryWait(long milliseconds) const; - /// Waits for the object to become signalled. - /// Returns true if the object - /// became signalled within the specified - /// time interval, false otherwise. - -private: - mutable Mutex _mutex; - mutable Event _event; -}; - - -// -// inlines -// -inline void SynchronizedObject::lock() const -{ - _mutex.lock(); -} - - -inline bool SynchronizedObject::tryLock() const -{ - return _mutex.tryLock(); -} - - -inline void SynchronizedObject::unlock() const -{ - _mutex.unlock(); -} - - -inline void SynchronizedObject::notify() const -{ - _event.set(); -} - - -inline void SynchronizedObject::wait() const -{ - _event.wait(); -} - - -inline void SynchronizedObject::wait(long milliseconds) const -{ - _event.wait(milliseconds); -} - - -inline bool SynchronizedObject::tryWait(long milliseconds) const -{ - return _event.tryWait(milliseconds); -} - - -} // namespace Poco - - -#endif // Foundation_SynchronizedObject_INCLUDED diff --git a/base/poco/Foundation/include/Poco/UnWindows.h b/base/poco/Foundation/include/Poco/UnWindows.h deleted file mode 100644 index 1f3835b8af5..00000000000 --- a/base/poco/Foundation/include/Poco/UnWindows.h +++ /dev/null @@ -1,135 +0,0 @@ -// -// UnWindows.h -// -// Library: Foundation -// Package: Core -// Module: UnWindows -// -// A wrapper around the header file that #undef's some -// of the macros for function names defined by that -// are a frequent source of conflicts (e.g., GetUserName). -// -// Remember, that most of the WIN32 API functions come in two variants, -// an Unicode variant (e.g., GetUserNameA) and an ASCII variant (GetUserNameW). -// There is also a macro (GetUserName) that's either defined to be the Unicode -// name or the ASCII name, depending on whether the UNICODE macro is #define'd -// or not. POCO always calls the Unicode or ASCII functions directly (depending -// on whether POCO_WIN32_UTF8 is #define'd or not), so the macros are not ignored. -// -// These macro definitions are a frequent case of problems and naming conflicts, -// especially for C++ programmers. Say, you define a class with a member function named -// GetUserName. Depending on whether "Poco/UnWindows.h" has been included by a particular -// translation unit or not, this might be changed to GetUserNameA/GetUserNameW, or not. -// While, due to naming conventions used, this is less of a problem in POCO, some -// of the users of POCO might use a different naming convention where this can become -// a problem. -// -// To disable the #undef's, compile POCO with the POCO_NO_UNWINDOWS macro #define'd. -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_UnWindows_INCLUDED -#define Foundation_UnWindows_INCLUDED - - -// Reduce bloat - - -// Microsoft Visual C++ includes copies of the Windows header files -// that were current at the time Visual C++ was released. -// The Windows header files use macros to indicate which versions -// of Windows support many programming elements. Therefore, you must -// define these macros to use new functionality introduced in each -// major operating system release. (Individual header files may use -// different macros; therefore, if compilation problems occur, check -// the header file that contains the definition for conditional -// definitions.) For more information, see SdkDdkVer.h. - - -# if defined(_WIN32_WINNT) -# if (_WIN32_WINNT < 0x0502) -# error Unsupported Windows version. -# endif -# elif defined(NTDDI_VERSION) -# if (NTDDI_VERSION < 0x05020000) -# error Unsupported Windows version. -# endif -# elif !defined(_WIN32_WINNT) -// Define minimum supported version. -// This can be changed, if needed. -// If allowed (see POCO_MIN_WINDOWS_OS_SUPPORT -// below), Platform_WIN32.h will do its -// best to determine the appropriate values -// and may redefine these. See Platform_WIN32.h -// for details. -# define _WIN32_WINNT 0x0502 -# define NTDDI_VERSION 0x05020000 -# endif - - -// To prevent Platform_WIN32.h to modify version defines, -// uncomment this, otherwise versions will be automatically -// discovered in Platform_WIN32.h. -// #define POCO_FORCE_MIN_WINDOWS_OS_SUPPORT - - -#include - - -#if !defined(POCO_NO_UNWINDOWS) -// A list of annoying macros to #undef. -// Extend as required. -# undef GetBinaryType -# undef GetShortPathName -# undef GetLongPathName -# undef GetEnvironmentStrings -# undef SetEnvironmentStrings -# undef FreeEnvironmentStrings -# undef FormatMessage -# undef EncryptFile -# undef DecryptFile -# undef CreateMutex -# undef OpenMutex -# undef CreateEvent -# undef OpenEvent -# undef CreateSemaphore -# undef OpenSemaphore -# undef LoadLibrary -# undef GetModuleFileName -# undef CreateProcess -# undef GetCommandLine -# undef GetEnvironmentVariable -# undef SetEnvironmentVariable -# undef ExpandEnvironmentStrings -# undef OutputDebugString -# undef FindResource -# undef UpdateResource -# undef FindAtom -# undef AddAtom -# undef GetSystemDirectory -# undef GetTempPath -# undef GetTempFileName -# undef SetCurrentDirectory -# undef GetCurrentDirectory -# undef CreateDirectory -# undef RemoveDirectory -# undef CreateFile -# undef DeleteFile -# undef SearchPath -# undef CopyFile -# undef MoveFile -# undef ReplaceFile -# undef GetComputerName -# undef SetComputerName -# undef GetUserName -# undef LogonUser -# undef GetVersion -# undef GetObject -#endif // POCO_NO_UNWINDOWS - -#endif // Foundation_UnWindows_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Windows1250Encoding.h b/base/poco/Foundation/include/Poco/Windows1250Encoding.h deleted file mode 100644 index 139c173949f..00000000000 --- a/base/poco/Foundation/include/Poco/Windows1250Encoding.h +++ /dev/null @@ -1,53 +0,0 @@ -// -// Windows1250Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Windows1250Encoding -// -// Definition of the Windows1250Encoding class. -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Windows1250Encoding_INCLUDED -#define Foundation_Windows1250Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Windows1250Encoding : public TextEncoding -/// Windows Codepage 1250 text encoding. -/// Based on: http://msdn.microsoft.com/en-us/goglobal/cc305143 -{ -public: - Windows1250Encoding(); - ~Windows1250Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Windows1250Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Windows1251Encoding.h b/base/poco/Foundation/include/Poco/Windows1251Encoding.h deleted file mode 100644 index f2fe483a134..00000000000 --- a/base/poco/Foundation/include/Poco/Windows1251Encoding.h +++ /dev/null @@ -1,53 +0,0 @@ -// -// Windows1251Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Windows1251Encoding -// -// Definition of the Windows1251Encoding class. -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Windows1251Encoding_INCLUDED -#define Foundation_Windows1251Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Windows1251Encoding : public TextEncoding -/// Windows Codepage 1251 text encoding. -/// Based on: http://msdn.microsoft.com/en-us/goglobal/cc305144 -{ -public: - Windows1251Encoding(); - ~Windows1251Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Windows1251Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Windows1252Encoding.h b/base/poco/Foundation/include/Poco/Windows1252Encoding.h deleted file mode 100644 index c1af357650b..00000000000 --- a/base/poco/Foundation/include/Poco/Windows1252Encoding.h +++ /dev/null @@ -1,52 +0,0 @@ -// -// Windows1252Encoding.h -// -// Library: Foundation -// Package: Text -// Module: Windows1252Encoding -// -// Definition of the Windows1252Encoding class. -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_Windows1252Encoding_INCLUDED -#define Foundation_Windows1252Encoding_INCLUDED - - -#include "Poco/Foundation.h" -#include "Poco/TextEncoding.h" - - -namespace Poco -{ - - -class Foundation_API Windows1252Encoding : public TextEncoding -/// Windows Codepage 1252 text encoding. -{ -public: - Windows1252Encoding(); - ~Windows1252Encoding(); - const char * canonicalName() const; - bool isA(const std::string & encodingName) const; - const CharacterMap & characterMap() const; - int convert(const unsigned char * bytes) const; - int convert(int ch, unsigned char * bytes, int length) const; - int queryConvert(const unsigned char * bytes, int length) const; - int sequenceLength(const unsigned char * bytes, int length) const; - -private: - static const char * _names[]; - static const CharacterMap _charMap; -}; - - -} // namespace Poco - - -#endif // Foundation_Windows1252Encoding_INCLUDED diff --git a/base/poco/Foundation/include/Poco/WindowsConsoleChannel.h b/base/poco/Foundation/include/Poco/WindowsConsoleChannel.h deleted file mode 100644 index 0473d030930..00000000000 --- a/base/poco/Foundation/include/Poco/WindowsConsoleChannel.h +++ /dev/null @@ -1,184 +0,0 @@ -// -// WindowsConsoleChannel.h -// -// Library: Foundation -// Package: Logging -// Module: WindowsConsoleChannel -// -// Definition of the WindowsConsoleChannel class. -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_WindowsConsoleChannel_INCLUDED -#define Foundation_WindowsConsoleChannel_INCLUDED - - -#include "Poco/Channel.h" -#include "Poco/Foundation.h" -#include "Poco/Mutex.h" -#include "Poco/UnWindows.h" - - -namespace Poco -{ - - -class Foundation_API WindowsConsoleChannel : public Channel -/// A channel that writes to the Windows console. -/// -/// Only the message's text is written, followed -/// by a newline. -/// -/// If POCO has been compiled with POCO_WIN32_UTF8, -/// log messages are assumed to be UTF-8 encoded, and -/// are converted to UTF-16 prior to writing them to the -/// console. This is the main difference to the ConsoleChannel -/// class, which cannot handle UTF-8 encoded messages on Windows. -/// -/// Chain this channel to a FormattingChannel with an -/// appropriate Formatter to control what is contained -/// in the text. -/// -/// Only available on Windows platforms. -{ -public: - WindowsConsoleChannel(); - /// Creates the WindowsConsoleChannel. - - void log(const Message & msg); - /// Logs the given message to the channel's stream. - -protected: - ~WindowsConsoleChannel(); - -private: - HANDLE _hConsole; - bool _isFile; -}; - - -class Foundation_API WindowsColorConsoleChannel : public Channel -/// A channel that writes to the Windows console. -/// -/// Only the message's text is written, followed -/// by a newline. -/// -/// If POCO has been compiled with POCO_WIN32_UTF8, -/// log messages are assumed to be UTF-8 encoded, and -/// are converted to UTF-16 prior to writing them to the -/// console. This is the main difference to the ConsoleChannel -/// class, which cannot handle UTF-8 encoded messages on Windows. -/// -/// Messages can be colored depending on priority. -/// -/// To enable message coloring, set the "enableColors" -/// property to true (default). Furthermore, colors can be -/// configured by setting the following properties -/// (default values are given in parenthesis): -/// -/// * traceColor (gray) -/// * debugColor (gray) -/// * informationColor (default) -/// * noticeColor (default) -/// * warningColor (yellow) -/// * errorColor (lightRed) -/// * criticalColor (lightRed) -/// * fatalColor (lightRed) -/// -/// The following color values are supported: -/// -/// * default -/// * black -/// * red -/// * green -/// * brown -/// * blue -/// * magenta -/// * cyan -/// * gray -/// * darkgray -/// * lightRed -/// * lightGreen -/// * yellow -/// * lightBlue -/// * lightMagenta -/// * lightCyan -/// * white -/// -/// Chain this channel to a FormattingChannel with an -/// appropriate Formatter to control what is contained -/// in the text. -/// -/// Only available on Windows platforms. -{ -public: - WindowsColorConsoleChannel(); - /// Creates the WindowsConsoleChannel. - - void log(const Message & msg); - /// Logs the given message to the channel's stream. - - void setProperty(const std::string & name, const std::string & value); - /// Sets the property with the given name. - /// - /// The following properties are supported: - /// * enableColors: Enable or disable colors. - /// * traceColor: Specify color for trace messages. - /// * debugColor: Specify color for debug messages. - /// * informationColor: Specify color for information messages. - /// * noticeColor: Specify color for notice messages. - /// * warningColor: Specify color for warning messages. - /// * errorColor: Specify color for error messages. - /// * criticalColor: Specify color for critical messages. - /// * fatalColor: Specify color for fatal messages. - /// - /// See the class documentation for a list of supported color values. - - std::string getProperty(const std::string & name) const; - /// Returns the value of the property with the given name. - /// See setProperty() for a description of the supported - /// properties. - -protected: - enum Color - { - CC_BLACK = 0x0000, - CC_RED = 0x0004, - CC_GREEN = 0x0002, - CC_BROWN = 0x0006, - CC_BLUE = 0x0001, - CC_MAGENTA = 0x0005, - CC_CYAN = 0x0003, - CC_GRAY = 0x0007, - CC_DARKGRAY = 0x0008, - CC_LIGHTRED = 0x000C, - CC_LIGHTGREEN = 0x000A, - CC_YELLOW = 0x000E, - CC_LIGHTBLUE = 0x0009, - CC_LIGHTMAGENTA = 0x000D, - CC_LIGHTCYAN = 0x000B, - CC_WHITE = 0x000F - }; - - ~WindowsColorConsoleChannel(); - WORD parseColor(const std::string & color) const; - std::string formatColor(WORD color) const; - void initColors(); - -private: - bool _enableColors; - HANDLE _hConsole; - bool _isFile; - WORD _colors[9]; -}; - - -} // namespace Poco - - -#endif // Foundation_WindowsConsoleChannel_INCLUDED diff --git a/base/poco/Foundation/src/Base32Decoder.cpp b/base/poco/Foundation/src/Base32Decoder.cpp deleted file mode 100644 index ba9f9db5958..00000000000 --- a/base/poco/Foundation/src/Base32Decoder.cpp +++ /dev/null @@ -1,160 +0,0 @@ -// -// Base32Decoder.cpp -// -// Library: Foundation -// Package: Streams -// Module: Base32 -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Base32Decoder.h" -#include "Poco/Base32Encoder.h" -#include "Poco/Exception.h" -#include "Poco/Mutex.h" -#include - - -namespace Poco { - - -unsigned char Base32DecoderBuf::IN_ENCODING[256]; -bool Base32DecoderBuf::IN_ENCODING_INIT = false; - - -namespace -{ - static FastMutex mutex; -} - - -Base32DecoderBuf::Base32DecoderBuf(std::istream& istr): - _groupLength(0), - _groupIndex(0), - _buf(*istr.rdbuf()) -{ - FastMutex::ScopedLock lock(mutex); - if (!IN_ENCODING_INIT) - { - for (unsigned i = 0; i < sizeof(IN_ENCODING); i++) - { - IN_ENCODING[i] = 0xFF; - } - for (unsigned i = 0; i < sizeof(Base32EncoderBuf::OUT_ENCODING); i++) - { - IN_ENCODING[Base32EncoderBuf::OUT_ENCODING[i]] = i; - } - IN_ENCODING[static_cast('=')] = '\0'; - IN_ENCODING_INIT = true; - } -} - - -Base32DecoderBuf::~Base32DecoderBuf() -{ -} - - -int Base32DecoderBuf::readFromDevice() -{ - if (_groupIndex < _groupLength) - { - return _group[_groupIndex++]; - } - else - { - unsigned char buffer[8]; - std::memset(buffer, '=', sizeof(buffer)); - int c; - - // per RFC-4648, Section 6, permissible block lengths are: - // 2, 4, 5, 7, and 8 bytes. Any other length is malformed. - // - do { - if ((c = readOne()) == -1) return -1; - buffer[0] = (unsigned char) c; - if (IN_ENCODING[buffer[0]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) throw DataFormatException(); - buffer[1] = (unsigned char) c; - if (IN_ENCODING[buffer[1]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) break; - buffer[2] = (unsigned char) c; - if (IN_ENCODING[buffer[2]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) throw DataFormatException(); - buffer[3] = (unsigned char) c; - if (IN_ENCODING[buffer[3]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) break; - buffer[4] = (unsigned char) c; - if (IN_ENCODING[buffer[4]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) break; - buffer[5] = (unsigned char) c; - if (IN_ENCODING[buffer[5]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) throw DataFormatException(); - buffer[6] = (unsigned char) c; - if (IN_ENCODING[buffer[6]] == 0xFF) throw DataFormatException(); - if ((c = readOne()) == -1) break; - buffer[7] = (unsigned char) c; - if (IN_ENCODING[buffer[7]] == 0xFF) throw DataFormatException(); - } while (false); - - _group[0] = (IN_ENCODING[buffer[0]] << 3) | (IN_ENCODING[buffer[1]] >> 2); - _group[1] = ((IN_ENCODING[buffer[1]] & 0x03) << 6) | (IN_ENCODING[buffer[2]] << 1) | (IN_ENCODING[buffer[3]] >> 4); - _group[2] = ((IN_ENCODING[buffer[3]] & 0x0F) << 4) | (IN_ENCODING[buffer[4]] >> 1); - _group[3] = ((IN_ENCODING[buffer[4]] & 0x01) << 7) | (IN_ENCODING[buffer[5]] << 2) | (IN_ENCODING[buffer[6]] >> 3); - _group[4] = ((IN_ENCODING[buffer[6]] & 0x07) << 5) | IN_ENCODING[buffer[7]]; - - if (buffer[2] == '=') - _groupLength = 1; - else if (buffer[4] == '=') - _groupLength = 2; - else if (buffer[5] == '=') - _groupLength = 3; - else if (buffer[7] == '=') - _groupLength = 4; - else - _groupLength = 5; - _groupIndex = 1; - return _group[0]; - } -} - - -int Base32DecoderBuf::readOne() -{ - int ch = _buf.sbumpc(); - return ch; -} - - -Base32DecoderIOS::Base32DecoderIOS(std::istream& istr): _buf(istr) -{ - poco_ios_init(&_buf); -} - - -Base32DecoderIOS::~Base32DecoderIOS() -{ -} - - -Base32DecoderBuf* Base32DecoderIOS::rdbuf() -{ - return &_buf; -} - - -Base32Decoder::Base32Decoder(std::istream& istr): Base32DecoderIOS(istr), std::istream(&_buf) -{ -} - - -Base32Decoder::~Base32Decoder() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Base32Encoder.cpp b/base/poco/Foundation/src/Base32Encoder.cpp deleted file mode 100644 index dbac2c977f8..00000000000 --- a/base/poco/Foundation/src/Base32Encoder.cpp +++ /dev/null @@ -1,202 +0,0 @@ -// -// Base32Encoder.cpp -// -// Library: Foundation -// Package: Streams -// Module: Base32 -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Base32Encoder.h" - - -namespace Poco { - - -const unsigned char Base32EncoderBuf::OUT_ENCODING[32] = -{ - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', - 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', - 'Y', 'Z', '2', '3', '4', '5', '6', '7', -}; - - -Base32EncoderBuf::Base32EncoderBuf(std::ostream& ostr, bool padding): - _groupLength(0), - _buf(*ostr.rdbuf()), - _doPadding(padding) -{ -} - - -Base32EncoderBuf::~Base32EncoderBuf() -{ - try - { - close(); - } - catch (...) - { - } -} - - - -int Base32EncoderBuf::writeToDevice(char c) -{ - static const int eof = std::char_traits::eof(); - - _group[_groupLength++] = (unsigned char) c; - if (_groupLength == 5) - { - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2) | (_group[1] >> 6); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x3E) >> 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x01) << 4) | (_group[2] >> 4); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[2] & 0x0F) << 1) | (_group[3] >> 7); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[3] & 0x7C) >> 2); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[3] & 0x03) << 3) | (_group[4] >> 5); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = (_group[4] & 0x1F); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - _groupLength = 0; - } - return charToInt(c); -} - - -int Base32EncoderBuf::close() -{ - static const int eof = std::char_traits::eof(); - - if (sync() == eof) return eof; - if (_groupLength == 1) - { - _group[1] = 0; - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - if (_doPadding) { - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - } - } - else if (_groupLength == 2) - { - _group[2] = 0; - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2) | (_group[1] >> 6); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x3E) >> 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x01) << 4); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - if (_doPadding) { - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - } - } - else if (_groupLength == 3) - { - _group[3] = 0; - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2) | (_group[1] >> 6); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x3E) >> 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x01) << 4) | (_group[2] >> 4); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[2] & 0x0F) << 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - if (_doPadding) { - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - if (_buf.sputc('=') == eof) return eof; - } - } - else if (_groupLength == 4) - { - _group[4] = 0; - unsigned char idx; - idx = _group[0] >> 3; - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[0] & 0x07) << 2) | (_group[1] >> 6); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x3E) >> 1); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[1] & 0x01) << 4) | (_group[2] >> 4); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[2] & 0x0F) << 1) | (_group[3] >> 7); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[3] & 0x7C) >> 2); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - idx = ((_group[3] & 0x03) << 3); - if (_buf.sputc(OUT_ENCODING[idx]) == eof) return eof; - if (_doPadding && _buf.sputc('=') == eof) return eof; - } - _groupLength = 0; - return _buf.pubsync(); -} - - -Base32EncoderIOS::Base32EncoderIOS(std::ostream& ostr, bool padding): - _buf(ostr, padding) -{ - poco_ios_init(&_buf); -} - - -Base32EncoderIOS::~Base32EncoderIOS() -{ -} - - -int Base32EncoderIOS::close() -{ - return _buf.close(); -} - - -Base32EncoderBuf* Base32EncoderIOS::rdbuf() -{ - return &_buf; -} - - -Base32Encoder::Base32Encoder(std::ostream& ostr, bool padding): - Base32EncoderIOS(ostr, padding), std::ostream(&_buf) -{ -} - - -Base32Encoder::~Base32Encoder() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/EventLogChannel.cpp b/base/poco/Foundation/src/EventLogChannel.cpp deleted file mode 100644 index ec35a9b5d28..00000000000 --- a/base/poco/Foundation/src/EventLogChannel.cpp +++ /dev/null @@ -1,221 +0,0 @@ -// -// EventLogChannel.cpp -// -// Library: Foundation -// Package: Logging -// Module: EventLogChannel -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/EventLogChannel.h" -#include "Poco/Message.h" -#include "Poco/String.h" -#include "pocomsg.h" - - -namespace Poco { - - -const std::string EventLogChannel::PROP_NAME = "name"; -const std::string EventLogChannel::PROP_HOST = "host"; -const std::string EventLogChannel::PROP_LOGHOST = "loghost"; -const std::string EventLogChannel::PROP_LOGFILE = "logfile"; - - -EventLogChannel::EventLogChannel(): - _logFile("Application"), - _h(0) -{ - const DWORD maxPathLen = MAX_PATH + 1; - char name[maxPathLen]; - int n = GetModuleFileNameA(NULL, name, maxPathLen); - if (n > 0) - { - char* end = name + n - 1; - while (end > name && *end != '\\') --end; - if (*end == '\\') ++end; - _name = end; - } -} - - -EventLogChannel::EventLogChannel(const std::string& name): - _name(name), - _logFile("Application"), - _h(0) -{ -} - - -EventLogChannel::EventLogChannel(const std::string& name, const std::string& host): - _name(name), - _host(host), - _logFile("Application"), - _h(0) -{ -} - - -EventLogChannel::~EventLogChannel() -{ - try - { - close(); - } - catch (...) - { - poco_unexpected(); - } -} - - -void EventLogChannel::open() -{ - setUpRegistry(); - _h = RegisterEventSource(_host.empty() ? NULL : _host.c_str(), _name.c_str()); - if (!_h) throw SystemException("cannot register event source"); -} - - -void EventLogChannel::close() -{ - if (_h) DeregisterEventSource(_h); - _h = 0; -} - - -void EventLogChannel::log(const Message& msg) -{ - if (!_h) open(); - const char* pMsg = msg.getText().c_str(); - ReportEvent(_h, getType(msg), getCategory(msg), POCO_MSG_LOG, NULL, 1, 0, &pMsg, NULL); -} - - -void EventLogChannel::setProperty(const std::string& name, const std::string& value) -{ - if (icompare(name, PROP_NAME) == 0) - _name = value; - else if (icompare(name, PROP_HOST) == 0) - _host = value; - else if (icompare(name, PROP_LOGHOST) == 0) - _host = value; - else if (icompare(name, PROP_LOGFILE) == 0) - _logFile = value; - else - Channel::setProperty(name, value); -} - - -std::string EventLogChannel::getProperty(const std::string& name) const -{ - if (icompare(name, PROP_NAME) == 0) - return _name; - else if (icompare(name, PROP_HOST) == 0) - return _host; - else if (icompare(name, PROP_LOGHOST) == 0) - return _host; - else if (icompare(name, PROP_LOGFILE) == 0) - return _logFile; - else - return Channel::getProperty(name); -} - - -int EventLogChannel::getType(const Message& msg) -{ - switch (msg.getPriority()) - { - case Message::PRIO_TRACE: - case Message::PRIO_DEBUG: - case Message::PRIO_INFORMATION: - return EVENTLOG_INFORMATION_TYPE; - case Message::PRIO_NOTICE: - case Message::PRIO_WARNING: - return EVENTLOG_WARNING_TYPE; - default: - return EVENTLOG_ERROR_TYPE; - } -} - - -int EventLogChannel::getCategory(const Message& msg) -{ - switch (msg.getPriority()) - { - case Message::PRIO_TRACE: - return POCO_CTG_TRACE; - case Message::PRIO_DEBUG: - return POCO_CTG_DEBUG; - case Message::PRIO_INFORMATION: - return POCO_CTG_INFORMATION; - case Message::PRIO_NOTICE: - return POCO_CTG_NOTICE; - case Message::PRIO_WARNING: - return POCO_CTG_WARNING; - case Message::PRIO_ERROR: - return POCO_CTG_ERROR; - case Message::PRIO_CRITICAL: - return POCO_CTG_CRITICAL; - case Message::PRIO_FATAL: - return POCO_CTG_FATAL; - default: - return 0; - } -} - - -void EventLogChannel::setUpRegistry() const -{ - std::string key = "SYSTEM\\CurrentControlSet\\Services\\EventLog\\"; - key.append(_logFile); - key.append("\\"); - key.append(_name); - HKEY hKey; - DWORD disp; - DWORD rc = RegCreateKeyEx(HKEY_LOCAL_MACHINE, key.c_str(), 0, NULL, REG_OPTION_NON_VOLATILE, KEY_ALL_ACCESS, NULL, &hKey, &disp); - if (rc != ERROR_SUCCESS) return; - - if (disp == REG_CREATED_NEW_KEY) - { - std::string path; - - if (path.empty()) - path = findLibrary("PocoMsg.dll"); - - if (!path.empty()) - { - DWORD count = 8; - DWORD types = 7; - RegSetValueEx(hKey, "CategoryMessageFile", 0, REG_SZ, (const BYTE*) path.c_str(), static_cast(path.size() + 1)); - RegSetValueEx(hKey, "EventMessageFile", 0, REG_SZ, (const BYTE*) path.c_str(), static_cast(path.size() + 1)); - RegSetValueEx(hKey, "CategoryCount", 0, REG_DWORD, (const BYTE*) &count, static_cast(sizeof(count))); - RegSetValueEx(hKey, "TypesSupported", 0, REG_DWORD, (const BYTE*) &types, static_cast(sizeof(types))); - } - } - RegCloseKey(hKey); -} - - -std::string EventLogChannel::findLibrary(const char* name) -{ - std::string path; - HMODULE dll = LoadLibraryA(name); - if (dll) - { - const DWORD maxPathLen = MAX_PATH + 1; - char name[maxPathLen]; - int n = GetModuleFileNameA(dll, name, maxPathLen); - if (n > 0) path = name; - FreeLibrary(dll); - } - return path; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/FPEnvironment_DEC.cpp b/base/poco/Foundation/src/FPEnvironment_DEC.cpp deleted file mode 100644 index b5995f83bf5..00000000000 --- a/base/poco/Foundation/src/FPEnvironment_DEC.cpp +++ /dev/null @@ -1,144 +0,0 @@ -// -// FPEnvironment_DEC.cpp -// -// Library: Foundation -// Package: Core -// Module: FPEnvironment -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -// -// _XOPEN_SOURCE disables the ieee fp functions -// in , therefore we undefine it for this file. -// -#undef _XOPEN_SOURCE - - -#include -#include -#include -#include "Poco/FPEnvironment_DEC.h" - - -namespace Poco { - - -FPEnvironmentImpl::FPEnvironmentImpl() -{ - _env = ieee_get_fp_control(); -} - - -FPEnvironmentImpl::FPEnvironmentImpl(const FPEnvironmentImpl& env) -{ - _env = env._env; -} - - -FPEnvironmentImpl::~FPEnvironmentImpl() -{ - ieee_set_fp_control(_env); -} - - -FPEnvironmentImpl& FPEnvironmentImpl::operator = (const FPEnvironmentImpl& env) -{ - _env = env._env; - return *this; -} - - -bool FPEnvironmentImpl::isInfiniteImpl(float value) -{ - int cls = fp_classf(value); - return cls == FP_POS_INF || cls == FP_NEG_INF; -} - - -bool FPEnvironmentImpl::isInfiniteImpl(double value) -{ - int cls = fp_class(value); - return cls == FP_POS_INF || cls == FP_NEG_INF; -} - - -bool FPEnvironmentImpl::isInfiniteImpl(long double value) -{ - int cls = fp_classl(value); - return cls == FP_POS_INF || cls == FP_NEG_INF; -} - - -bool FPEnvironmentImpl::isNaNImpl(float value) -{ - return isnanf(value) != 0; -} - - -bool FPEnvironmentImpl::isNaNImpl(double value) -{ - return isnan(value) != 0; -} - - -bool FPEnvironmentImpl::isNaNImpl(long double value) -{ - return isnanl(value) != 0; -} - - -float FPEnvironmentImpl::copySignImpl(float target, float source) -{ - return copysignf(target, source); -} - - -double FPEnvironmentImpl::copySignImpl(double target, double source) -{ - return copysign(target, source); -} - - -long double FPEnvironmentImpl::copySignImpl(long double target, long double source) -{ - return copysignl(target, source); -} - - -void FPEnvironmentImpl::keepCurrentImpl() -{ - ieee_set_fp_control(_env); -} - - -void FPEnvironmentImpl::clearFlagsImpl() -{ - ieee_set_fp_control(0); -} - - -bool FPEnvironmentImpl::isFlagImpl(FlagImpl flag) -{ - return (ieee_get_fp_control() & flag) != 0; -} - - -void FPEnvironmentImpl::setRoundingModeImpl(RoundingModeImpl mode) -{ - // not supported -} - - -FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::getRoundingModeImpl() -{ - // not supported - return FPEnvironmentImpl::RoundingModeImpl(0); -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/FPEnvironment_DUMMY.cpp b/base/poco/Foundation/src/FPEnvironment_DUMMY.cpp deleted file mode 100644 index b473d0dfb93..00000000000 --- a/base/poco/Foundation/src/FPEnvironment_DUMMY.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// -// FPEnvironment_C99.cpp -// -// Library: Foundation -// Package: Core -// Module: FPEnvironment -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/FPEnvironment_DUMMY.h" - - -namespace Poco { - - -FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::_roundingMode; - - -FPEnvironmentImpl::FPEnvironmentImpl() -{ -} - - -FPEnvironmentImpl::FPEnvironmentImpl(const FPEnvironmentImpl& env) -{ -} - - -FPEnvironmentImpl::~FPEnvironmentImpl() -{ -} - - -FPEnvironmentImpl& FPEnvironmentImpl::operator = (const FPEnvironmentImpl& env) -{ - return *this; -} - - -void FPEnvironmentImpl::keepCurrentImpl() -{ -} - - -void FPEnvironmentImpl::clearFlagsImpl() -{ -} - - -bool FPEnvironmentImpl::isFlagImpl(FlagImpl flag) -{ - return false; -} - - -void FPEnvironmentImpl::setRoundingModeImpl(RoundingModeImpl mode) -{ - _roundingMode = mode; -} - - -FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::getRoundingModeImpl() -{ - return _roundingMode; -} - - -long double FPEnvironmentImpl::copySignImpl(long double target, long double source) -{ - return (source >= 0 && target >= 0) || (source < 0 && target < 0) ? target : -target; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/FPEnvironment_QNX.cpp b/base/poco/Foundation/src/FPEnvironment_QNX.cpp deleted file mode 100644 index 057eb8eb7bc..00000000000 --- a/base/poco/Foundation/src/FPEnvironment_QNX.cpp +++ /dev/null @@ -1,82 +0,0 @@ -// -// FPEnvironment_QNX.cpp -// -// Library: Foundation -// Package: Core -// Module: FPEnvironment -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/FPEnvironment_QNX.h" - - -namespace Poco { - - -FPEnvironmentImpl::FPEnvironmentImpl() -{ - fegetenv(&_env); -} - - -FPEnvironmentImpl::FPEnvironmentImpl(const FPEnvironmentImpl& env) -{ - _env = env._env; -} - - -FPEnvironmentImpl::~FPEnvironmentImpl() -{ - fesetenv(&_env); -} - - -FPEnvironmentImpl& FPEnvironmentImpl::operator = (const FPEnvironmentImpl& env) -{ - _env = env._env; - return *this; -} - - -void FPEnvironmentImpl::keepCurrentImpl() -{ - fegetenv(&_env); -} - - -void FPEnvironmentImpl::clearFlagsImpl() -{ - feclearexcept(FE_ALL_EXCEPT); -} - - -bool FPEnvironmentImpl::isFlagImpl(FlagImpl flag) -{ - return fetestexcept(flag) != 0; -} - - -void FPEnvironmentImpl::setRoundingModeImpl(RoundingModeImpl mode) -{ - fesetround(mode); -} - - -FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::getRoundingModeImpl() -{ - return (RoundingModeImpl) fegetround(); -} - - -long double FPEnvironmentImpl::copySignImpl(long double target, long double source) -{ - return (source >= 0 && target >= 0) || (source < 0 && target < 0) ? target : -target; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Latin1Encoding.cpp b/base/poco/Foundation/src/Latin1Encoding.cpp deleted file mode 100644 index c5aa00d745a..00000000000 --- a/base/poco/Foundation/src/Latin1Encoding.cpp +++ /dev/null @@ -1,119 +0,0 @@ -// -// Latin1Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Latin1Encoding -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Latin1Encoding.h" -#include "Poco/String.h" - - -namespace Poco { - - -const char* Latin1Encoding::_names[] = -{ - "ISO-8859-1", - "Latin1", - "Latin-1", - NULL -}; - - -const TextEncoding::CharacterMap Latin1Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - /* 10 */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - /* 20 */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, - /* 30 */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, - /* 40 */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, - /* 50 */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, - /* 60 */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, - /* 70 */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, - /* 80 */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, - /* 90 */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, - /* a0 */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - /* b0 */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, - /* c0 */ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, - /* d0 */ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - /* e0 */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, - /* f0 */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, -}; - - -Latin1Encoding::Latin1Encoding() -{ -} - - -Latin1Encoding::~Latin1Encoding() -{ -} - - -const char* Latin1Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Latin1Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Latin1Encoding::characterMap() const -{ - return _charMap; -} - - -int Latin1Encoding::convert(const unsigned char* bytes) const -{ - return *bytes; -} - - -int Latin1Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255) - { - if (bytes && length >= 1) - *bytes = (unsigned char) ch; - return 1; - } - else return 0; -} - - -int Latin1Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return *bytes; - else - return -1; -} - - -int Latin1Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Latin2Encoding.cpp b/base/poco/Foundation/src/Latin2Encoding.cpp deleted file mode 100644 index a0c77150099..00000000000 --- a/base/poco/Foundation/src/Latin2Encoding.cpp +++ /dev/null @@ -1,179 +0,0 @@ -// -// Latin2Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Latin2Encoding -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Latin2Encoding.h" -#include "Poco/String.h" - - -namespace Poco { - - -const char* Latin2Encoding::_names[] = -{ - "ISO-8859-2", - "Latin2", - "Latin-2", - NULL -}; - - -const TextEncoding::CharacterMap Latin2Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - /* 10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - /* 20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - /* 30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - /* 40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - /* 50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - /* 60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - /* 70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - /* 80 */ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - /* 90 */ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - /* a0 */ 0x00a0, 0x0104, 0x02d8, 0x0141, 0x00a4, 0x013d, 0x015a, 0x00a7, 0x00a8, 0x0160, 0x015e, 0x0164, 0x0179, 0x00ad, 0x017d, 0x017b, - /* b0 */ 0x00b0, 0x0105, 0x02db, 0x0142, 0x00b4, 0x013e, 0x015b, 0x02c7, 0x00b8, 0x0161, 0x015f, 0x0165, 0x017a, 0x02dd, 0x017e, 0x017c, - /* c0 */ 0x0154, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x0139, 0x0106, 0x00c7, 0x010c, 0x00c9, 0x0118, 0x00cb, 0x011a, 0x00cd, 0x00ce, 0x010e, - /* d0 */ 0x0110, 0x0143, 0x0147, 0x00d3, 0x00d4, 0x0150, 0x00d6, 0x00d7, 0x0158, 0x016e, 0x00da, 0x0170, 0x00dc, 0x00dd, 0x0162, 0x00df, - /* e0 */ 0x0155, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x013a, 0x0107, 0x00e7, 0x010d, 0x00e9, 0x0119, 0x00eb, 0x011b, 0x00ed, 0x00ee, 0x010f, - /* f0 */ 0x0111, 0x0144, 0x0148, 0x00f3, 0x00f4, 0x0151, 0x00f6, 0x00f7, 0x0159, 0x016f, 0x00fa, 0x0171, 0x00fc, 0x00fd, 0x0163, 0x02d9, -}; - - -Latin2Encoding::Latin2Encoding() -{ -} - - -Latin2Encoding::~Latin2Encoding() -{ -} - - -const char* Latin2Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Latin2Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Latin2Encoding::characterMap() const -{ - return _charMap; -} - - -int Latin2Encoding::convert(const unsigned char* bytes) const -{ - return _charMap[*bytes]; -} - - -int Latin2Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255 && _charMap[ch] == ch) - { - if (bytes && length >= 1) - *bytes = (unsigned char) ch; - return 1; - } - switch(ch) - { - case 0x0104: if (bytes && length >= 1) *bytes = 0xa1; return 1; - case 0x02d8: if (bytes && length >= 1) *bytes = 0xa2; return 1; - case 0x0141: if (bytes && length >= 1) *bytes = 0xa3; return 1; - case 0x013d: if (bytes && length >= 1) *bytes = 0xa5; return 1; - case 0x015a: if (bytes && length >= 1) *bytes = 0xa6; return 1; - case 0x0160: if (bytes && length >= 1) *bytes = 0xa9; return 1; - case 0x015e: if (bytes && length >= 1) *bytes = 0xaa; return 1; - case 0x0164: if (bytes && length >= 1) *bytes = 0xab; return 1; - case 0x0179: if (bytes && length >= 1) *bytes = 0xac; return 1; - case 0x017d: if (bytes && length >= 1) *bytes = 0xae; return 1; - case 0x017b: if (bytes && length >= 1) *bytes = 0xaf; return 1; - case 0x0105: if (bytes && length >= 1) *bytes = 0xb1; return 1; - case 0x02db: if (bytes && length >= 1) *bytes = 0xb2; return 1; - case 0x0142: if (bytes && length >= 1) *bytes = 0xb3; return 1; - case 0x013e: if (bytes && length >= 1) *bytes = 0xb5; return 1; - case 0x015b: if (bytes && length >= 1) *bytes = 0xb6; return 1; - case 0x02c7: if (bytes && length >= 1) *bytes = 0xb7; return 1; - case 0x0161: if (bytes && length >= 1) *bytes = 0xb9; return 1; - case 0x015f: if (bytes && length >= 1) *bytes = 0xba; return 1; - case 0x0165: if (bytes && length >= 1) *bytes = 0xbb; return 1; - case 0x017a: if (bytes && length >= 1) *bytes = 0xbc; return 1; - case 0x02dd: if (bytes && length >= 1) *bytes = 0xbd; return 1; - case 0x017e: if (bytes && length >= 1) *bytes = 0xbe; return 1; - case 0x017c: if (bytes && length >= 1) *bytes = 0xbf; return 1; - case 0x0154: if (bytes && length >= 1) *bytes = 0xc0; return 1; - case 0x0102: if (bytes && length >= 1) *bytes = 0xc3; return 1; - case 0x0139: if (bytes && length >= 1) *bytes = 0xc5; return 1; - case 0x0106: if (bytes && length >= 1) *bytes = 0xc6; return 1; - case 0x010c: if (bytes && length >= 1) *bytes = 0xc8; return 1; - case 0x0118: if (bytes && length >= 1) *bytes = 0xca; return 1; - case 0x011a: if (bytes && length >= 1) *bytes = 0xcc; return 1; - case 0x010e: if (bytes && length >= 1) *bytes = 0xcf; return 1; - case 0x0110: if (bytes && length >= 1) *bytes = 0xd0; return 1; - case 0x0143: if (bytes && length >= 1) *bytes = 0xd1; return 1; - case 0x0147: if (bytes && length >= 1) *bytes = 0xd2; return 1; - case 0x0150: if (bytes && length >= 1) *bytes = 0xd5; return 1; - case 0x0158: if (bytes && length >= 1) *bytes = 0xd8; return 1; - case 0x016e: if (bytes && length >= 1) *bytes = 0xd9; return 1; - case 0x0170: if (bytes && length >= 1) *bytes = 0xdb; return 1; - case 0x0162: if (bytes && length >= 1) *bytes = 0xde; return 1; - case 0x0155: if (bytes && length >= 1) *bytes = 0xe0; return 1; - case 0x0103: if (bytes && length >= 1) *bytes = 0xe3; return 1; - case 0x013a: if (bytes && length >= 1) *bytes = 0xe5; return 1; - case 0x0107: if (bytes && length >= 1) *bytes = 0xe6; return 1; - case 0x010d: if (bytes && length >= 1) *bytes = 0xe8; return 1; - case 0x0119: if (bytes && length >= 1) *bytes = 0xea; return 1; - case 0x011b: if (bytes && length >= 1) *bytes = 0xec; return 1; - case 0x010f: if (bytes && length >= 1) *bytes = 0xef; return 1; - case 0x0111: if (bytes && length >= 1) *bytes = 0xf0; return 1; - case 0x0144: if (bytes && length >= 1) *bytes = 0xf1; return 1; - case 0x0148: if (bytes && length >= 1) *bytes = 0xf2; return 1; - case 0x0151: if (bytes && length >= 1) *bytes = 0xf5; return 1; - case 0x0159: if (bytes && length >= 1) *bytes = 0xf8; return 1; - case 0x016f: if (bytes && length >= 1) *bytes = 0xf9; return 1; - case 0x0171: if (bytes && length >= 1) *bytes = 0xfb; return 1; - case 0x0163: if (bytes && length >= 1) *bytes = 0xfe; return 1; - case 0x02d9: if (bytes && length >= 1) *bytes = 0xff; return 1; - default: return 0; - } -} - - -int Latin2Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return _charMap[*bytes]; - else - return -1; -} - - -int Latin2Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Latin9Encoding.cpp b/base/poco/Foundation/src/Latin9Encoding.cpp deleted file mode 100644 index eadc71f30e0..00000000000 --- a/base/poco/Foundation/src/Latin9Encoding.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// -// Latin9Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Latin9Encoding -// -// Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Latin9Encoding.h" -#include "Poco/String.h" - - -namespace Poco { - - -const char* Latin9Encoding::_names[] = -{ - "ISO-8859-15", - "Latin9", - "Latin-9", - NULL -}; - - -const TextEncoding::CharacterMap Latin9Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - /* 10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - /* 20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - /* 30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - /* 40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - /* 50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - /* 60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - /* 70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - /* 80 */ 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, - /* 90 */ 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, - /* a0 */ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20ac, 0x00a5, 0x0160, 0x00a7, 0x0161, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - /* b0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x017d, 0x00b5, 0x00b6, 0x00b7, 0x017e, 0x00b9, 0x00ba, 0x00bb, 0x0152, 0x0153, 0x0178, 0x00bf, - /* c0 */ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - /* d0 */ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, - /* e0 */ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - /* f0 */ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, -}; - - -Latin9Encoding::Latin9Encoding() -{ -} - - -Latin9Encoding::~Latin9Encoding() -{ -} - - -const char* Latin9Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Latin9Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Latin9Encoding::characterMap() const -{ - return _charMap; -} - - -int Latin9Encoding::convert(const unsigned char* bytes) const -{ - return _charMap[*bytes]; -} - - -int Latin9Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255 && _charMap[ch] == ch) - { - if (bytes && length >= 1) - *bytes = ch; - return 1; - } - else switch (ch) - { - case 0x0152: if (bytes && length >= 1) *bytes = 0xbc; return 1; - case 0x0153: if (bytes && length >= 1) *bytes = 0xbd; return 1; - case 0x0160: if (bytes && length >= 1) *bytes = 0xa6; return 1; - case 0x0161: if (bytes && length >= 1) *bytes = 0xa8; return 1; - case 0x017d: if (bytes && length >= 1) *bytes = 0xb4; return 1; - case 0x017e: if (bytes && length >= 1) *bytes = 0xb8; return 1; - case 0x0178: if (bytes && length >= 1) *bytes = 0xbe; return 1; - case 0x20ac: if (bytes && length >= 1) *bytes = 0xa4; return 1; - default: return 0; - } -} - - -int Latin9Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return _charMap[*bytes]; - else - return -1; -} - - -int Latin9Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/MD4Engine.cpp b/base/poco/Foundation/src/MD4Engine.cpp deleted file mode 100644 index cab90623185..00000000000 --- a/base/poco/Foundation/src/MD4Engine.cpp +++ /dev/null @@ -1,278 +0,0 @@ -// -// MD4Engine.cpp -// -// Library: Foundation -// Package: Crypt -// Module: MD4Engine -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// -// -// MD4 (RFC 1320) algorithm: -// Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All -// rights reserved. -// -// License to copy and use this software is granted provided that it -// is identified as the "RSA Data Security, Inc. MD4 Message-Digest -// Algorithm" in all material mentioning or referencing this software -// or this function. -// -// License is also granted to make and use derivative works provided -// that such works are identified as "derived from the RSA Data -// Security, Inc. MD4 Message-Digest Algorithm" in all material -// mentioning or referencing the derived work. -// -// RSA Data Security, Inc. makes no representations concerning either -// the merchantability of this software or the suitability of this -// software for any particular purpose. It is provided "as is" -// without express or implied warranty of any kind. -// -// These notices must be retained in any copies of any part of this -// documentation and/or software. -// - - -#include "Poco/MD4Engine.h" -#include - - -namespace Poco { - - -MD4Engine::MD4Engine() -{ - _digest.reserve(16); - reset(); -} - - -MD4Engine::~MD4Engine() -{ - reset(); -} - - -void MD4Engine::updateImpl(const void* input_, std::size_t inputLen) -{ - const unsigned char* input = (const unsigned char*) input_; - unsigned int i, index, partLen; - - /* Compute number of bytes mod 64 */ - index = (unsigned int)((_context.count[0] >> 3) & 0x3F); - - /* Update number of bits */ - if ((_context.count[0] += ((UInt32) inputLen << 3)) < ((UInt32) inputLen << 3)) - _context.count[1]++; - _context.count[1] += ((UInt32) inputLen >> 29); - - partLen = 64 - index; - - /* Transform as many times as possible. */ - if (inputLen >= partLen) - { - std::memcpy(&_context.buffer[index], input, partLen); - transform(_context.state, _context.buffer); - - for (i = partLen; i + 63 < inputLen; i += 64) - transform(_context.state, &input[i]); - - index = 0; - } - else i = 0; - - /* Buffer remaining input */ - std::memcpy(&_context.buffer[index], &input[i], inputLen-i); -} - - -std::size_t MD4Engine::digestLength() const -{ - return DIGEST_SIZE; -} - - -void MD4Engine::reset() -{ - std::memset(&_context, 0, sizeof(_context)); - _context.count[0] = _context.count[1] = 0; - _context.state[0] = 0x67452301; - _context.state[1] = 0xefcdab89; - _context.state[2] = 0x98badcfe; - _context.state[3] = 0x10325476; -} - - -const DigestEngine::Digest& MD4Engine::digest() -{ - static const unsigned char PADDING[64] = - { - 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 - }; - unsigned char bits[8]; - unsigned int index, padLen; - - /* Save number of bits */ - encode(bits, _context.count, 8); - - /* Pad out to 56 mod 64. */ - index = (unsigned int)((_context.count[0] >> 3) & 0x3f); - padLen = (index < 56) ? (56 - index) : (120 - index); - update(PADDING, padLen); - - /* Append length (before padding) */ - update(bits, 8); - - /* Store state in digest */ - unsigned char digest[16]; - encode(digest, _context.state, 16); - _digest.clear(); - _digest.insert(_digest.begin(), digest, digest + sizeof(digest)); - - /* Zeroize sensitive information. */ - std::memset(&_context, 0, sizeof (_context)); - reset(); - return _digest; -} - - -/* Constants for MD4Transform routine. */ -#define S11 3 -#define S12 7 -#define S13 11 -#define S14 19 -#define S21 3 -#define S22 5 -#define S23 9 -#define S24 13 -#define S31 3 -#define S32 9 -#define S33 11 -#define S34 15 - - -/* F, G and H are basic MD4 functions. */ -#define F(x, y, z) (((x) & (y)) | ((~x) & (z))) -#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) - - -/* ROTATE_LEFT rotates x left n bits. */ -#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n)))) - - -/* FF, GG and HH are transformations for rounds 1, 2 and 3 */ -/* Rotation is separate from addition to prevent recomputation */ -#define FF(a, b, c, d, x, s) { \ - (a) += F ((b), (c), (d)) + (x); \ - (a) = ROTATE_LEFT ((a), (s)); \ - } -#define GG(a, b, c, d, x, s) { \ - (a) += G ((b), (c), (d)) + (x) + (UInt32)0x5a827999; \ - (a) = ROTATE_LEFT ((a), (s)); \ - } -#define HH(a, b, c, d, x, s) { \ - (a) += H ((b), (c), (d)) + (x) + (UInt32)0x6ed9eba1; \ - (a) = ROTATE_LEFT ((a), (s)); \ - } - - -void MD4Engine::transform (UInt32 state[4], const unsigned char block[64]) -{ - UInt32 a = state[0], b = state[1], c = state[2], d = state[3], x[16]; - - decode(x, block, 64); - - /* Round 1 */ - FF (a, b, c, d, x[ 0], S11); /* 1 */ - FF (d, a, b, c, x[ 1], S12); /* 2 */ - FF (c, d, a, b, x[ 2], S13); /* 3 */ - FF (b, c, d, a, x[ 3], S14); /* 4 */ - FF (a, b, c, d, x[ 4], S11); /* 5 */ - FF (d, a, b, c, x[ 5], S12); /* 6 */ - FF (c, d, a, b, x[ 6], S13); /* 7 */ - FF (b, c, d, a, x[ 7], S14); /* 8 */ - FF (a, b, c, d, x[ 8], S11); /* 9 */ - FF (d, a, b, c, x[ 9], S12); /* 10 */ - FF (c, d, a, b, x[10], S13); /* 11 */ - FF (b, c, d, a, x[11], S14); /* 12 */ - FF (a, b, c, d, x[12], S11); /* 13 */ - FF (d, a, b, c, x[13], S12); /* 14 */ - FF (c, d, a, b, x[14], S13); /* 15 */ - FF (b, c, d, a, x[15], S14); /* 16 */ - - /* Round 2 */ - GG (a, b, c, d, x[ 0], S21); /* 17 */ - GG (d, a, b, c, x[ 4], S22); /* 18 */ - GG (c, d, a, b, x[ 8], S23); /* 19 */ - GG (b, c, d, a, x[12], S24); /* 20 */ - GG (a, b, c, d, x[ 1], S21); /* 21 */ - GG (d, a, b, c, x[ 5], S22); /* 22 */ - GG (c, d, a, b, x[ 9], S23); /* 23 */ - GG (b, c, d, a, x[13], S24); /* 24 */ - GG (a, b, c, d, x[ 2], S21); /* 25 */ - GG (d, a, b, c, x[ 6], S22); /* 26 */ - GG (c, d, a, b, x[10], S23); /* 27 */ - GG (b, c, d, a, x[14], S24); /* 28 */ - GG (a, b, c, d, x[ 3], S21); /* 29 */ - GG (d, a, b, c, x[ 7], S22); /* 30 */ - GG (c, d, a, b, x[11], S23); /* 31 */ - GG (b, c, d, a, x[15], S24); /* 32 */ - - /* Round 3 */ - HH (a, b, c, d, x[ 0], S31); /* 33 */ - HH (d, a, b, c, x[ 8], S32); /* 34 */ - HH (c, d, a, b, x[ 4], S33); /* 35 */ - HH (b, c, d, a, x[12], S34); /* 36 */ - HH (a, b, c, d, x[ 2], S31); /* 37 */ - HH (d, a, b, c, x[10], S32); /* 38 */ - HH (c, d, a, b, x[ 6], S33); /* 39 */ - HH (b, c, d, a, x[14], S34); /* 40 */ - HH (a, b, c, d, x[ 1], S31); /* 41 */ - HH (d, a, b, c, x[ 9], S32); /* 42 */ - HH (c, d, a, b, x[ 5], S33); /* 43 */ - HH (b, c, d, a, x[13], S34); /* 44 */ - HH (a, b, c, d, x[ 3], S31); /* 45 */ - HH (d, a, b, c, x[11], S32); /* 46 */ - HH (c, d, a, b, x[ 7], S33); /* 47 */ - HH (b, c, d, a, x[15], S34); /* 48 */ - - state[0] += a; - state[1] += b; - state[2] += c; - state[3] += d; - - /* Zeroize sensitive information. */ - std::memset(x, 0, sizeof(x)); -} - - -void MD4Engine::encode(unsigned char* output, const UInt32* input, std::size_t len) -{ - unsigned int i, j; - - for (i = 0, j = 0; j < len; i++, j += 4) - { - output[j] = (unsigned char)(input[i] & 0xff); - output[j+1] = (unsigned char)((input[i] >> 8) & 0xff); - output[j+2] = (unsigned char)((input[i] >> 16) & 0xff); - output[j+3] = (unsigned char)((input[i] >> 24) & 0xff); - } -} - - -void MD4Engine::decode(UInt32* output, const unsigned char* input, std::size_t len) -{ - unsigned int i, j; - - for (i = 0, j = 0; j < len; i++, j += 4) - output[i] = ((UInt32)input[j]) | (((UInt32)input[j+1]) << 8) | - (((UInt32)input[j+2]) << 16) | (((UInt32)input[j+3]) << 24); -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Manifest.cpp b/base/poco/Foundation/src/Manifest.cpp deleted file mode 100644 index c4e828c5e97..00000000000 --- a/base/poco/Foundation/src/Manifest.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// Manifest.cpp -// -// Library: Foundation -// Package: SharedLibrary -// Module: ClassLoader -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Manifest.h" - - -namespace Poco { - - -ManifestBase::ManifestBase() -{ -} - - -ManifestBase::~ManifestBase() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/NumericString.cpp b/base/poco/Foundation/src/NumericString.cpp index 90b943015d6..7141d5d5d57 100644 --- a/base/poco/Foundation/src/NumericString.cpp +++ b/base/poco/Foundation/src/NumericString.cpp @@ -14,23 +14,9 @@ #include "Poco/Bugcheck.h" - -// +++ double conversion +++ -#define double_conversion poco_double_conversion // don't collide with standalone double_conversion library -#define UNREACHABLE poco_bugcheck -#define UNIMPLEMENTED poco_bugcheck -#include "diy-fp.cc" -#include "cached-powers.cc" -#include "bignum-dtoa.cc" -#include "bignum.cc" -#include "fast-dtoa.cc" -#include "fixed-dtoa.cc" -#include "strtod.cc" -#include "double-conversion.cc" -// --- double conversion --- +#include #include "Poco/NumericString.h" -poco_static_assert(POCO_MAX_FLT_STRING_LEN == double_conversion::kMaxSignificantDecimalDigits); #include "Poco/String.h" #include #include @@ -263,7 +249,7 @@ float strToFloat(const char* str) int processed; int flags = StringToDoubleConverter::ALLOW_LEADING_SPACES | StringToDoubleConverter::ALLOW_TRAILING_SPACES; - StringToDoubleConverter converter(flags, 0.0, Single::NaN(), POCO_FLT_INF, POCO_FLT_NAN); + StringToDoubleConverter converter(flags, 0.0, std::numeric_limits::quiet_NaN(), POCO_FLT_INF, POCO_FLT_NAN); float result = converter.StringToFloat(str, static_cast(strlen(str)), &processed); return result; } @@ -275,7 +261,7 @@ double strToDouble(const char* str) int processed; int flags = StringToDoubleConverter::ALLOW_LEADING_SPACES | StringToDoubleConverter::ALLOW_TRAILING_SPACES; - StringToDoubleConverter converter(flags, 0.0, Double::NaN(), POCO_FLT_INF, POCO_FLT_NAN); + StringToDoubleConverter converter(flags, 0.0, std::numeric_limits::quiet_NaN(), POCO_FLT_INF, POCO_FLT_NAN); double result = converter.StringToDouble(str, static_cast(strlen(str)), &processed); return result; } diff --git a/base/poco/Foundation/src/PipeImpl_DUMMY.cpp b/base/poco/Foundation/src/PipeImpl_DUMMY.cpp deleted file mode 100644 index b0faf7b68ba..00000000000 --- a/base/poco/Foundation/src/PipeImpl_DUMMY.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// -// PipeImpl_DUMMY.cpp -// -// Library: Foundation -// Package: Processes -// Module: PipeImpl -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/PipeImpl_DUMMY.h" - - -namespace Poco { - - -PipeImpl::PipeImpl() -{ -} - - -PipeImpl::~PipeImpl() -{ -} - - -int PipeImpl::writeBytes(const void* buffer, int length) -{ - return 0; -} - - -int PipeImpl::readBytes(void* buffer, int length) -{ - return 0; -} - - -PipeImpl::Handle PipeImpl::readHandle() const -{ - return 0; -} - - -PipeImpl::Handle PipeImpl::writeHandle() const -{ - return 0; -} - - -void PipeImpl::closeRead() -{ -} - - -void PipeImpl::closeWrite() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/PipeStream.cpp b/base/poco/Foundation/src/PipeStream.cpp deleted file mode 100644 index 96fb323581d..00000000000 --- a/base/poco/Foundation/src/PipeStream.cpp +++ /dev/null @@ -1,127 +0,0 @@ -// -// PipeStream.cpp -// -// Library: Foundation -// Package: Processes -// Module: PipeStream -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/PipeStream.h" - - -namespace Poco { - - -// -// PipeStreamBuf -// - - -PipeStreamBuf::PipeStreamBuf(const Pipe& pipe, openmode mode): - BufferedStreamBuf(STREAM_BUFFER_SIZE, mode), - _pipe(pipe) -{ -} - - -PipeStreamBuf::~PipeStreamBuf() -{ -} - - -int PipeStreamBuf::readFromDevice(char* buffer, std::streamsize length) -{ - return _pipe.readBytes(buffer, (int) length); -} - - -int PipeStreamBuf::writeToDevice(const char* buffer, std::streamsize length) -{ - return _pipe.writeBytes(buffer, (int) length); -} - - -void PipeStreamBuf::close() -{ - _pipe.close(Pipe::CLOSE_BOTH); -} - - -// -// PipeIOS -// - - -PipeIOS::PipeIOS(const Pipe& pipe, openmode mode): - _buf(pipe, mode) -{ - poco_ios_init(&_buf); -} - - -PipeIOS::~PipeIOS() -{ - try - { - _buf.sync(); - } - catch (...) - { - } -} - - -PipeStreamBuf* PipeIOS::rdbuf() -{ - return &_buf; -} - - -void PipeIOS::close() -{ - _buf.sync(); - _buf.close(); -} - - -// -// PipeOutputStream -// - - -PipeOutputStream::PipeOutputStream(const Pipe& pipe): - PipeIOS(pipe, std::ios::out), - std::ostream(&_buf) -{ -} - - -PipeOutputStream::~PipeOutputStream() -{ -} - - -// -// PipeInputStream -// - - -PipeInputStream::PipeInputStream(const Pipe& pipe): - PipeIOS(pipe, std::ios::in), - std::istream(&_buf) -{ -} - - -PipeInputStream::~PipeInputStream() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Semaphore_VX.cpp b/base/poco/Foundation/src/Semaphore_VX.cpp deleted file mode 100644 index 5bc63b530c7..00000000000 --- a/base/poco/Foundation/src/Semaphore_VX.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// -// Semaphore_VX.cpp -// -// Library: Foundation -// Package: Threading -// Module: Semaphore -// -// Copyright (c) 2004-2011, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Semaphore_VX.h" -#include - - -namespace Poco { - - -SemaphoreImpl::SemaphoreImpl(int n, int max) -{ - poco_assert (n >= 0 && max > 0 && n <= max); - - _sem = semCCreate(SEM_Q_PRIORITY, n); - if (_sem == 0) - throw Poco::SystemException("cannot create semaphore"); -} - - -SemaphoreImpl::~SemaphoreImpl() -{ - semDelete(_sem); -} - - -void SemaphoreImpl::waitImpl() -{ - if (semTake(_sem, WAIT_FOREVER) != OK) - throw SystemException("cannot wait for semaphore"); -} - - -bool SemaphoreImpl::waitImpl(long milliseconds) -{ - int ticks = milliseconds*sysClkRateGet()/1000; - return semTake(_sem, ticks) == OK; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Semaphore_WIN32.cpp b/base/poco/Foundation/src/Semaphore_WIN32.cpp deleted file mode 100644 index 2ec04a8c02d..00000000000 --- a/base/poco/Foundation/src/Semaphore_WIN32.cpp +++ /dev/null @@ -1,65 +0,0 @@ -// -// Semaphore_WIN32.cpp -// -// Library: Foundation -// Package: Threading -// Module: Semaphore -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Semaphore_WIN32.h" - - -namespace Poco { - - -SemaphoreImpl::SemaphoreImpl(int n, int max) -{ - poco_assert (n >= 0 && max > 0 && n <= max); - - _sema = CreateSemaphoreW(NULL, n, max, NULL); - if (!_sema) - { - throw SystemException("cannot create semaphore"); - } -} - - -SemaphoreImpl::~SemaphoreImpl() -{ - CloseHandle(_sema); -} - - -void SemaphoreImpl::waitImpl() -{ - switch (WaitForSingleObject(_sema, INFINITE)) - { - case WAIT_OBJECT_0: - return; - default: - throw SystemException("wait for semaphore failed"); - } -} - - -bool SemaphoreImpl::waitImpl(long milliseconds) -{ - switch (WaitForSingleObject(_sema, milliseconds + 1)) - { - case WAIT_TIMEOUT: - return false; - case WAIT_OBJECT_0: - return true; - default: - throw SystemException("wait for semaphore failed"); - } -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/SharedMemory_DUMMY.cpp b/base/poco/Foundation/src/SharedMemory_DUMMY.cpp deleted file mode 100644 index 38586323806..00000000000 --- a/base/poco/Foundation/src/SharedMemory_DUMMY.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// -// SharedMemoryImpl.cpp -// -// Library: Foundation -// Package: Processes -// Module: SharedMemoryImpl -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/SharedMemory_DUMMY.h" - - -namespace Poco { - - -SharedMemoryImpl::SharedMemoryImpl(const std::string&, std::size_t, SharedMemory::AccessMode, const void*, bool) -{ -} - - -SharedMemoryImpl::SharedMemoryImpl(const Poco::File&, SharedMemory::AccessMode, const void*) -{ -} - - -SharedMemoryImpl::~SharedMemoryImpl() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/StreamTokenizer.cpp b/base/poco/Foundation/src/StreamTokenizer.cpp deleted file mode 100644 index 1b6e936274e..00000000000 --- a/base/poco/Foundation/src/StreamTokenizer.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// -// StreamTokenizer.cpp -// -// Library: Foundation -// Package: Streams -// Module: StreamTokenizer -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/StreamTokenizer.h" - - -namespace Poco { - - -StreamTokenizer::StreamTokenizer(): - _pIstr(0) -{ -} - - -StreamTokenizer::StreamTokenizer(std::istream& istr): - _pIstr(&istr) -{ -} - - -StreamTokenizer::~StreamTokenizer() -{ - for (TokenVec::iterator it = _tokens.begin(); it != _tokens.end(); ++it) - { - delete it->pToken; - } -} - - -void StreamTokenizer::attachToStream(std::istream& istr) -{ - _pIstr = &istr; -} - - -void StreamTokenizer::addToken(Token* pToken) -{ - poco_check_ptr (pToken); - - TokenInfo ti; - ti.pToken = pToken; - ti.ignore = (pToken->tokenClass() == Token::COMMENT_TOKEN || pToken->tokenClass() == Token::WHITESPACE_TOKEN); - _tokens.push_back(ti); -} - - -void StreamTokenizer::addToken(Token* pToken, bool ignore) -{ - poco_check_ptr (pToken); - - TokenInfo ti; - ti.pToken = pToken; - ti.ignore = ignore; - _tokens.push_back(ti); -} - - -const Token* StreamTokenizer::next() -{ - poco_check_ptr (_pIstr); - - static const int eof = std::char_traits::eof(); - - int first = _pIstr->get(); - TokenVec::const_iterator it = _tokens.begin(); - while (first != eof && it != _tokens.end()) - { - const TokenInfo& ti = *it; - if (ti.pToken->start((char) first, *_pIstr)) - { - ti.pToken->finish(*_pIstr); - if (ti.ignore) - { - first = _pIstr->get(); - it = _tokens.begin(); - } - else return ti.pToken; - } - else ++it; - } - if (first == eof) - { - return &_eofToken; - } - else - { - _invalidToken.start((char) first, *_pIstr); - return &_invalidToken; - } -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/SynchronizedObject.cpp b/base/poco/Foundation/src/SynchronizedObject.cpp deleted file mode 100644 index 6a42e6594dc..00000000000 --- a/base/poco/Foundation/src/SynchronizedObject.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// -// SynchronizedObject.cpp -// -// Library: Foundation -// Package: Threading -// Module: SynchronizedObject -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/SynchronizedObject.h" - - -namespace Poco { - - -SynchronizedObject::SynchronizedObject() -{ -} - - -SynchronizedObject::~SynchronizedObject() -{ -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/TextEncoding.cpp b/base/poco/Foundation/src/TextEncoding.cpp index cfd1c77ef95..f652d6f5d37 100644 --- a/base/poco/Foundation/src/TextEncoding.cpp +++ b/base/poco/Foundation/src/TextEncoding.cpp @@ -16,15 +16,9 @@ #include "Poco/Exception.h" #include "Poco/String.h" #include "Poco/ASCIIEncoding.h" -#include "Poco/Latin1Encoding.h" -#include "Poco/Latin2Encoding.h" -#include "Poco/Latin9Encoding.h" #include "Poco/UTF32Encoding.h" #include "Poco/UTF16Encoding.h" #include "Poco/UTF8Encoding.h" -#include "Poco/Windows1250Encoding.h" -#include "Poco/Windows1251Encoding.h" -#include "Poco/Windows1252Encoding.h" #include "Poco/RWLock.h" #include "Poco/SingletonHolder.h" #include @@ -47,15 +41,9 @@ public: add(pUtf8Encoding, TextEncoding::GLOBAL); add(new ASCIIEncoding); - add(new Latin1Encoding); - add(new Latin2Encoding); - add(new Latin9Encoding); add(pUtf8Encoding); add(new UTF16Encoding); add(new UTF32Encoding); - add(new Windows1250Encoding); - add(new Windows1251Encoding); - add(new Windows1252Encoding); } ~TextEncodingManager() diff --git a/base/poco/Foundation/src/Timezone_VX.cpp b/base/poco/Foundation/src/Timezone_VX.cpp deleted file mode 100644 index 18339bffab4..00000000000 --- a/base/poco/Foundation/src/Timezone_VX.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// -// Timezone_VXX.cpp -// -// Library: Foundation -// Package: DateTime -// Module: Timezone -// -// Copyright (c) 2004-2011, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Timezone.h" -#include "Poco/Exception.h" -#include "Poco/Environment.h" -#include - - -namespace Poco { - - -int Timezone::utcOffset() -{ - std::time_t now = std::time(NULL); - struct std::tm t; - gmtime_r(&now, &t); - std::time_t utc = std::mktime(&t); - return now - utc; -} - - -int Timezone::dst() -{ - std::time_t now = std::time(NULL); - struct std::tm t; - if (localtime_r(&now, &t) != OK) - throw Poco::SystemException("cannot get local time DST offset"); - return t.tm_isdst == 1 ? 3600 : 0; -} - - -bool Timezone::isDst(const Timestamp& timestamp) -{ - std::time_t time = timestamp.epochTime(); - struct std::tm* tms = std::localtime(&time); - if (!tms) throw Poco::SystemException("cannot get local time DST flag"); - return tms->tm_isdst > 0; -} - - -std::string Timezone::name() -{ - // format of TIMEZONE environment variable: - // name_of_zone:<(unused)>:time_in_minutes_from_UTC:daylight_start:daylight_end - std::string tz = Environment::get("TIMEZONE", "UTC"); - std::string::size_type pos = tz.find(':'); - if (pos != std::string::npos) - return tz.substr(0, pos); - else - return tz; -} - - -std::string Timezone::standardName() -{ - return name(); -} - - -std::string Timezone::dstName() -{ - return name(); -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Windows1251Encoding.cpp b/base/poco/Foundation/src/Windows1251Encoding.cpp deleted file mode 100644 index 91f1d23a859..00000000000 --- a/base/poco/Foundation/src/Windows1251Encoding.cpp +++ /dev/null @@ -1,237 +0,0 @@ -// -// Windows1251Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Windows1251Encoding -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Windows1251Encoding.h" -#include "Poco/String.h" - - -namespace Poco { - - -const char* Windows1251Encoding::_names[] = -{ - "windows-1251", - "Windows-1251", - "cp1251", - "CP1251", - NULL -}; - - -const TextEncoding::CharacterMap Windows1251Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - /* 10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - /* 20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - /* 30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - /* 40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - /* 50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - /* 60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - /* 70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - /* 80 */ 0x0402, 0x0403, 0x201a, 0x0453, 0x201e, 0x2026, 0x2020, 0x2021, 0x20ac, 0x2030, 0x0409, 0x2039, 0x040a, 0x040c, 0x040b, 0x040f, - /* 90 */ 0x0452, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0xfffe, 0x2122, 0x0459, 0x203a, 0x045a, 0x045c, 0x045b, 0x045f, - /* a0 */ 0x00a0, 0x040e, 0x045e, 0x0408, 0x00a4, 0x0490, 0x00a6, 0x00a7, 0x0401, 0x00a9, 0x0404, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x0407, - /* b0 */ 0x00b0, 0x00b1, 0x0406, 0x0456, 0x0491, 0x00b5, 0x00b6, 0x00b7, 0x0451, 0x2116, 0x0454, 0x00bb, 0x0458, 0x0405, 0x0455, 0x0457, - /* c0 */ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f, - /* d0 */ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, - /* e0 */ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043a, 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, - /* f0 */ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, 0x044d, 0x044e, 0x044f, -}; - - -Windows1251Encoding::Windows1251Encoding() -{ -} - - -Windows1251Encoding::~Windows1251Encoding() -{ -} - - -const char* Windows1251Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Windows1251Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Windows1251Encoding::characterMap() const -{ - return _charMap; -} - - -int Windows1251Encoding::convert(const unsigned char* bytes) const -{ - return _charMap[*bytes]; -} - - -int Windows1251Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255 && _charMap[ch] == ch) - { - if (bytes && length >= 1) - *bytes = (unsigned char) ch; - return 1; - } - else switch(ch) - { - case 0x0402: if (bytes && length >= 1) *bytes = 0x80; return 1; - case 0x0403: if (bytes && length >= 1) *bytes = 0x81; return 1; - case 0x201a: if (bytes && length >= 1) *bytes = 0x82; return 1; - case 0x0453: if (bytes && length >= 1) *bytes = 0x83; return 1; - case 0x201e: if (bytes && length >= 1) *bytes = 0x84; return 1; - case 0x2026: if (bytes && length >= 1) *bytes = 0x85; return 1; - case 0x2020: if (bytes && length >= 1) *bytes = 0x86; return 1; - case 0x2021: if (bytes && length >= 1) *bytes = 0x87; return 1; - case 0x20ac: if (bytes && length >= 1) *bytes = 0x88; return 1; - case 0x2030: if (bytes && length >= 1) *bytes = 0x89; return 1; - case 0x0409: if (bytes && length >= 1) *bytes = 0x8a; return 1; - case 0x2039: if (bytes && length >= 1) *bytes = 0x8b; return 1; - case 0x040a: if (bytes && length >= 1) *bytes = 0x8c; return 1; - case 0x040c: if (bytes && length >= 1) *bytes = 0x8d; return 1; - case 0x040b: if (bytes && length >= 1) *bytes = 0x8e; return 1; - case 0x040f: if (bytes && length >= 1) *bytes = 0x8f; return 1; - case 0x0452: if (bytes && length >= 1) *bytes = 0x90; return 1; - case 0x2018: if (bytes && length >= 1) *bytes = 0x91; return 1; - case 0x2019: if (bytes && length >= 1) *bytes = 0x92; return 1; - case 0x201c: if (bytes && length >= 1) *bytes = 0x93; return 1; - case 0x201d: if (bytes && length >= 1) *bytes = 0x94; return 1; - case 0x2022: if (bytes && length >= 1) *bytes = 0x95; return 1; - case 0x2013: if (bytes && length >= 1) *bytes = 0x96; return 1; - case 0x2014: if (bytes && length >= 1) *bytes = 0x97; return 1; - case 0xfffe: if (bytes && length >= 1) *bytes = 0x98; return 1; - case 0x2122: if (bytes && length >= 1) *bytes = 0x99; return 1; - case 0x0459: if (bytes && length >= 1) *bytes = 0x9a; return 1; - case 0x203a: if (bytes && length >= 1) *bytes = 0x9b; return 1; - case 0x045a: if (bytes && length >= 1) *bytes = 0x9c; return 1; - case 0x045c: if (bytes && length >= 1) *bytes = 0x9d; return 1; - case 0x045b: if (bytes && length >= 1) *bytes = 0x9e; return 1; - case 0x045f: if (bytes && length >= 1) *bytes = 0x9f; return 1; - case 0x040e: if (bytes && length >= 1) *bytes = 0xa1; return 1; - case 0x045e: if (bytes && length >= 1) *bytes = 0xa2; return 1; - case 0x0408: if (bytes && length >= 1) *bytes = 0xa3; return 1; - case 0x0490: if (bytes && length >= 1) *bytes = 0xa5; return 1; - case 0x0401: if (bytes && length >= 1) *bytes = 0xa8; return 1; - case 0x0404: if (bytes && length >= 1) *bytes = 0xaa; return 1; - case 0x0407: if (bytes && length >= 1) *bytes = 0xaf; return 1; - case 0x0406: if (bytes && length >= 1) *bytes = 0xb2; return 1; - case 0x0456: if (bytes && length >= 1) *bytes = 0xb3; return 1; - case 0x0491: if (bytes && length >= 1) *bytes = 0xb4; return 1; - case 0x0451: if (bytes && length >= 1) *bytes = 0xb8; return 1; - case 0x2116: if (bytes && length >= 1) *bytes = 0xb9; return 1; - case 0x0454: if (bytes && length >= 1) *bytes = 0xba; return 1; - case 0x0458: if (bytes && length >= 1) *bytes = 0xbc; return 1; - case 0x0405: if (bytes && length >= 1) *bytes = 0xbd; return 1; - case 0x0455: if (bytes && length >= 1) *bytes = 0xbe; return 1; - case 0x0457: if (bytes && length >= 1) *bytes = 0xbf; return 1; - case 0x0410: if (bytes && length >= 1) *bytes = 0xc0; return 1; - case 0x0411: if (bytes && length >= 1) *bytes = 0xc1; return 1; - case 0x0412: if (bytes && length >= 1) *bytes = 0xc2; return 1; - case 0x0413: if (bytes && length >= 1) *bytes = 0xc3; return 1; - case 0x0414: if (bytes && length >= 1) *bytes = 0xc4; return 1; - case 0x0415: if (bytes && length >= 1) *bytes = 0xc5; return 1; - case 0x0416: if (bytes && length >= 1) *bytes = 0xc6; return 1; - case 0x0417: if (bytes && length >= 1) *bytes = 0xc7; return 1; - case 0x0418: if (bytes && length >= 1) *bytes = 0xc8; return 1; - case 0x0419: if (bytes && length >= 1) *bytes = 0xc9; return 1; - case 0x041a: if (bytes && length >= 1) *bytes = 0xca; return 1; - case 0x041b: if (bytes && length >= 1) *bytes = 0xcb; return 1; - case 0x041c: if (bytes && length >= 1) *bytes = 0xcc; return 1; - case 0x041d: if (bytes && length >= 1) *bytes = 0xcd; return 1; - case 0x041e: if (bytes && length >= 1) *bytes = 0xce; return 1; - case 0x041f: if (bytes && length >= 1) *bytes = 0xcf; return 1; - case 0x0420: if (bytes && length >= 1) *bytes = 0xd0; return 1; - case 0x0421: if (bytes && length >= 1) *bytes = 0xd1; return 1; - case 0x0422: if (bytes && length >= 1) *bytes = 0xd2; return 1; - case 0x0423: if (bytes && length >= 1) *bytes = 0xd3; return 1; - case 0x0424: if (bytes && length >= 1) *bytes = 0xd4; return 1; - case 0x0425: if (bytes && length >= 1) *bytes = 0xd5; return 1; - case 0x0426: if (bytes && length >= 1) *bytes = 0xd6; return 1; - case 0x0427: if (bytes && length >= 1) *bytes = 0xd7; return 1; - case 0x0428: if (bytes && length >= 1) *bytes = 0xd8; return 1; - case 0x0429: if (bytes && length >= 1) *bytes = 0xd9; return 1; - case 0x042a: if (bytes && length >= 1) *bytes = 0xda; return 1; - case 0x042b: if (bytes && length >= 1) *bytes = 0xdb; return 1; - case 0x042c: if (bytes && length >= 1) *bytes = 0xdc; return 1; - case 0x042d: if (bytes && length >= 1) *bytes = 0xdd; return 1; - case 0x042e: if (bytes && length >= 1) *bytes = 0xde; return 1; - case 0x042f: if (bytes && length >= 1) *bytes = 0xdf; return 1; - case 0x0430: if (bytes && length >= 1) *bytes = 0xe0; return 1; - case 0x0431: if (bytes && length >= 1) *bytes = 0xe1; return 1; - case 0x0432: if (bytes && length >= 1) *bytes = 0xe2; return 1; - case 0x0433: if (bytes && length >= 1) *bytes = 0xe3; return 1; - case 0x0434: if (bytes && length >= 1) *bytes = 0xe4; return 1; - case 0x0435: if (bytes && length >= 1) *bytes = 0xe5; return 1; - case 0x0436: if (bytes && length >= 1) *bytes = 0xe6; return 1; - case 0x0437: if (bytes && length >= 1) *bytes = 0xe7; return 1; - case 0x0438: if (bytes && length >= 1) *bytes = 0xe8; return 1; - case 0x0439: if (bytes && length >= 1) *bytes = 0xe9; return 1; - case 0x043a: if (bytes && length >= 1) *bytes = 0xea; return 1; - case 0x043b: if (bytes && length >= 1) *bytes = 0xeb; return 1; - case 0x043c: if (bytes && length >= 1) *bytes = 0xec; return 1; - case 0x043d: if (bytes && length >= 1) *bytes = 0xed; return 1; - case 0x043e: if (bytes && length >= 1) *bytes = 0xee; return 1; - case 0x043f: if (bytes && length >= 1) *bytes = 0xef; return 1; - case 0x0440: if (bytes && length >= 1) *bytes = 0xf0; return 1; - case 0x0441: if (bytes && length >= 1) *bytes = 0xf1; return 1; - case 0x0442: if (bytes && length >= 1) *bytes = 0xf2; return 1; - case 0x0443: if (bytes && length >= 1) *bytes = 0xf3; return 1; - case 0x0444: if (bytes && length >= 1) *bytes = 0xf4; return 1; - case 0x0445: if (bytes && length >= 1) *bytes = 0xf5; return 1; - case 0x0446: if (bytes && length >= 1) *bytes = 0xf6; return 1; - case 0x0447: if (bytes && length >= 1) *bytes = 0xf7; return 1; - case 0x0448: if (bytes && length >= 1) *bytes = 0xf8; return 1; - case 0x0449: if (bytes && length >= 1) *bytes = 0xf9; return 1; - case 0x044a: if (bytes && length >= 1) *bytes = 0xfa; return 1; - case 0x044b: if (bytes && length >= 1) *bytes = 0xfb; return 1; - case 0x044c: if (bytes && length >= 1) *bytes = 0xfc; return 1; - case 0x044d: if (bytes && length >= 1) *bytes = 0xfd; return 1; - case 0x044e: if (bytes && length >= 1) *bytes = 0xfe; return 1; - case 0x044f: if (bytes && length >= 1) *bytes = 0xff; return 1; - default: return 0; - } -} - - -int Windows1251Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return _charMap[*bytes]; - else - return -1; -} - - -int Windows1251Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco - diff --git a/base/poco/Foundation/src/Windows1252Encoding.cpp b/base/poco/Foundation/src/Windows1252Encoding.cpp deleted file mode 100644 index b73a19dd90d..00000000000 --- a/base/poco/Foundation/src/Windows1252Encoding.cpp +++ /dev/null @@ -1,151 +0,0 @@ -// -// Windows1252Encoding.cpp -// -// Library: Foundation -// Package: Text -// Module: Windows1252Encoding -// -// Copyright (c) 2005-2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Windows1252Encoding.h" -#include "Poco/String.h" -#include - - -namespace Poco { - - -const char* Windows1252Encoding::_names[] = -{ - "windows-1252", - "Windows-1252", - "cp1252", - "CP1252", - NULL -}; - - -const TextEncoding::CharacterMap Windows1252Encoding::_charMap = -{ - /* 00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f */ - /* 00 */ 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, - /* 10 */ 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, - /* 20 */ 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, - /* 30 */ 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, - /* 40 */ 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, - /* 50 */ 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, - /* 60 */ 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, - /* 70 */ 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, - /* 80 */ 0x20ac, 0x0081, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021, 0x02c6, 0x2030, 0x0160, 0x2039, 0x0152, 0x008d, 0x017d, 0x008f, - /* 90 */ 0x0090, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014, 0x02dc, 0x2122, 0x0161, 0x203a, 0x0153, 0x009d, 0x017e, 0x0178, - /* a0 */ 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, - /* b0 */ 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, - /* c0 */ 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, - /* d0 */ 0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x00d7, 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x00df, - /* e0 */ 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, 0x00ef, - /* f0 */ 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x00f7, 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff, -}; - - -Windows1252Encoding::Windows1252Encoding() -{ -} - - -Windows1252Encoding::~Windows1252Encoding() -{ -} - - -const char* Windows1252Encoding::canonicalName() const -{ - return _names[0]; -} - - -bool Windows1252Encoding::isA(const std::string& encodingName) const -{ - for (const char** name = _names; *name; ++name) - { - if (Poco::icompare(encodingName, *name) == 0) - return true; - } - return false; -} - - -const TextEncoding::CharacterMap& Windows1252Encoding::characterMap() const -{ - return _charMap; -} - - -int Windows1252Encoding::convert(const unsigned char* bytes) const -{ - return _charMap[*bytes]; -} - - -int Windows1252Encoding::convert(int ch, unsigned char* bytes, int length) const -{ - if (ch >= 0 && ch <= 255 && _charMap[ch] == ch) - { - if (bytes && length >= 1) - *bytes = ch; - return 1; - } - else switch (ch) - { - case 0x20ac: if (bytes && length >= 1) *bytes = 0x80; return 1; - case 0x201a: if (bytes && length >= 1) *bytes = 0x82; return 1; - case 0x0192: if (bytes && length >= 1) *bytes = 0x83; return 1; - case 0x201e: if (bytes && length >= 1) *bytes = 0x84; return 1; - case 0x2026: if (bytes && length >= 1) *bytes = 0x85; return 1; - case 0x2020: if (bytes && length >= 1) *bytes = 0x86; return 1; - case 0x2021: if (bytes && length >= 1) *bytes = 0x87; return 1; - case 0x02c6: if (bytes && length >= 1) *bytes = 0x88; return 1; - case 0x2030: if (bytes && length >= 1) *bytes = 0x89; return 1; - case 0x0160: if (bytes && length >= 1) *bytes = 0x8a; return 1; - case 0x2039: if (bytes && length >= 1) *bytes = 0x8b; return 1; - case 0x0152: if (bytes && length >= 1) *bytes = 0x8c; return 1; - case 0x017d: if (bytes && length >= 1) *bytes = 0x8e; return 1; - case 0x2018: if (bytes && length >= 1) *bytes = 0x91; return 1; - case 0x2019: if (bytes && length >= 1) *bytes = 0x92; return 1; - case 0x201c: if (bytes && length >= 1) *bytes = 0x93; return 1; - case 0x201d: if (bytes && length >= 1) *bytes = 0x94; return 1; - case 0x2022: if (bytes && length >= 1) *bytes = 0x95; return 1; - case 0x2013: if (bytes && length >= 1) *bytes = 0x96; return 1; - case 0x2014: if (bytes && length >= 1) *bytes = 0x97; return 1; - case 0x02dc: if (bytes && length >= 1) *bytes = 0x98; return 1; - case 0x2122: if (bytes && length >= 1) *bytes = 0x99; return 1; - case 0x0161: if (bytes && length >= 1) *bytes = 0x9a; return 1; - case 0x203a: if (bytes && length >= 1) *bytes = 0x9b; return 1; - case 0x0153: if (bytes && length >= 1) *bytes = 0x9c; return 1; - case 0x017e: if (bytes && length >= 1) *bytes = 0x9e; return 1; - case 0x0178: if (bytes && length >= 1) *bytes = 0x9f; return 1; - default: return 0; - } -} - - -int Windows1252Encoding::queryConvert(const unsigned char* bytes, int length) const -{ - if (1 <= length) - return _charMap[*bytes]; - else - return -1; -} - - -int Windows1252Encoding::sequenceLength(const unsigned char* bytes, int length) const -{ - return 1; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/WindowsConsoleChannel.cpp b/base/poco/Foundation/src/WindowsConsoleChannel.cpp deleted file mode 100644 index 48665bb178a..00000000000 --- a/base/poco/Foundation/src/WindowsConsoleChannel.cpp +++ /dev/null @@ -1,269 +0,0 @@ -// -// WindowsConsoleChannel.cpp -// -// Library: Foundation -// Package: Logging -// Module: WindowsConsoleChannel -// -// Copyright (c) 2007, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/WindowsConsoleChannel.h" -#include "Poco/Message.h" -#include "Poco/String.h" -#include "Poco/Exception.h" - - -namespace Poco { - - -WindowsConsoleChannel::WindowsConsoleChannel(): - _isFile(false), - _hConsole(INVALID_HANDLE_VALUE) -{ - _hConsole = GetStdHandle(STD_OUTPUT_HANDLE); - // check whether the console has been redirected - DWORD mode; - _isFile = (GetConsoleMode(_hConsole, &mode) == 0); -} - - -WindowsConsoleChannel::~WindowsConsoleChannel() -{ -} - - -void WindowsConsoleChannel::log(const Message& msg) -{ - std::string text = msg.getText(); - text += "\r\n"; - - DWORD written; - WriteFile(_hConsole, text.data(), text.size(), &written, NULL); -} - - -WindowsColorConsoleChannel::WindowsColorConsoleChannel(): - _enableColors(true), - _isFile(false), - _hConsole(INVALID_HANDLE_VALUE) -{ - _hConsole = GetStdHandle(STD_OUTPUT_HANDLE); - // check whether the console has been redirected - DWORD mode; - _isFile = (GetConsoleMode(_hConsole, &mode) == 0); - initColors(); -} - - -WindowsColorConsoleChannel::~WindowsColorConsoleChannel() -{ -} - - -void WindowsColorConsoleChannel::log(const Message& msg) -{ - std::string text = msg.getText(); - text += "\r\n"; - - if (_enableColors && !_isFile) - { - WORD attr = _colors[0]; - attr &= 0xFFF0; - attr |= _colors[msg.getPriority()]; - SetConsoleTextAttribute(_hConsole, attr); - } - - DWORD written; - WriteFile(_hConsole, text.data(), text.size(), &written, NULL); - - if (_enableColors && !_isFile) - { - SetConsoleTextAttribute(_hConsole, _colors[0]); - } -} - - -void WindowsColorConsoleChannel::setProperty(const std::string& name, const std::string& value) -{ - if (name == "enableColors") - { - _enableColors = icompare(value, "true") == 0; - } - else if (name == "traceColor") - { - _colors[Message::PRIO_TRACE] = parseColor(value); - } - else if (name == "debugColor") - { - _colors[Message::PRIO_DEBUG] = parseColor(value); - } - else if (name == "informationColor") - { - _colors[Message::PRIO_INFORMATION] = parseColor(value); - } - else if (name == "noticeColor") - { - _colors[Message::PRIO_NOTICE] = parseColor(value); - } - else if (name == "warningColor") - { - _colors[Message::PRIO_WARNING] = parseColor(value); - } - else if (name == "errorColor") - { - _colors[Message::PRIO_ERROR] = parseColor(value); - } - else if (name == "criticalColor") - { - _colors[Message::PRIO_CRITICAL] = parseColor(value); - } - else if (name == "fatalColor") - { - _colors[Message::PRIO_FATAL] = parseColor(value); - } - else - { - Channel::setProperty(name, value); - } -} - - -std::string WindowsColorConsoleChannel::getProperty(const std::string& name) const -{ - if (name == "enableColors") - { - return _enableColors ? "true" : "false"; - } - else if (name == "traceColor") - { - return formatColor(_colors[Message::PRIO_TRACE]); - } - else if (name == "debugColor") - { - return formatColor(_colors[Message::PRIO_DEBUG]); - } - else if (name == "informationColor") - { - return formatColor(_colors[Message::PRIO_INFORMATION]); - } - else if (name == "noticeColor") - { - return formatColor(_colors[Message::PRIO_NOTICE]); - } - else if (name == "warningColor") - { - return formatColor(_colors[Message::PRIO_WARNING]); - } - else if (name == "errorColor") - { - return formatColor(_colors[Message::PRIO_ERROR]); - } - else if (name == "criticalColor") - { - return formatColor(_colors[Message::PRIO_CRITICAL]); - } - else if (name == "fatalColor") - { - return formatColor(_colors[Message::PRIO_FATAL]); - } - else - { - return Channel::getProperty(name); - } -} - - -WORD WindowsColorConsoleChannel::parseColor(const std::string& color) const -{ - if (icompare(color, "default") == 0) - return _colors[0]; - else if (icompare(color, "black") == 0) - return CC_BLACK; - else if (icompare(color, "red") == 0) - return CC_RED; - else if (icompare(color, "green") == 0) - return CC_GREEN; - else if (icompare(color, "brown") == 0) - return CC_BROWN; - else if (icompare(color, "blue") == 0) - return CC_BLUE; - else if (icompare(color, "magenta") == 0) - return CC_MAGENTA; - else if (icompare(color, "cyan") == 0) - return CC_CYAN; - else if (icompare(color, "gray") == 0) - return CC_GRAY; - else if (icompare(color, "darkGray") == 0) - return CC_DARKGRAY; - else if (icompare(color, "lightRed") == 0) - return CC_LIGHTRED; - else if (icompare(color, "lightGreen") == 0) - return CC_LIGHTGREEN; - else if (icompare(color, "yellow") == 0) - return CC_YELLOW; - else if (icompare(color, "lightBlue") == 0) - return CC_LIGHTBLUE; - else if (icompare(color, "lightMagenta") == 0) - return CC_LIGHTMAGENTA; - else if (icompare(color, "lightCyan") == 0) - return CC_LIGHTCYAN; - else if (icompare(color, "white") == 0) - return CC_WHITE; - else throw InvalidArgumentException("Invalid color value", color); -} - - -std::string WindowsColorConsoleChannel::formatColor(WORD color) const -{ - switch (color) - { - case CC_BLACK: return "black"; - case CC_RED: return "red"; - case CC_GREEN: return "green"; - case CC_BROWN: return "brown"; - case CC_BLUE: return "blue"; - case CC_MAGENTA: return "magenta"; - case CC_CYAN: return "cyan"; - case CC_GRAY: return "gray"; - case CC_DARKGRAY: return "darkGray"; - case CC_LIGHTRED: return "lightRed"; - case CC_LIGHTGREEN: return "lightGreen"; - case CC_YELLOW: return "yellow"; - case CC_LIGHTBLUE: return "lightBlue"; - case CC_LIGHTMAGENTA: return "lightMagenta"; - case CC_LIGHTCYAN: return "lightCyan"; - case CC_WHITE: return "white"; - default: return "invalid"; - } -} - - -void WindowsColorConsoleChannel::initColors() -{ - if (!_isFile) - { - CONSOLE_SCREEN_BUFFER_INFO csbi; - GetConsoleScreenBufferInfo(_hConsole, &csbi); - _colors[0] = csbi.wAttributes; - } - else - { - _colors[0] = CC_WHITE; - } - _colors[Message::PRIO_FATAL] = CC_LIGHTRED; - _colors[Message::PRIO_CRITICAL] = CC_LIGHTRED; - _colors[Message::PRIO_ERROR] = CC_LIGHTRED; - _colors[Message::PRIO_WARNING] = CC_YELLOW; - _colors[Message::PRIO_NOTICE] = _colors[0]; - _colors[Message::PRIO_INFORMATION] = _colors[0]; - _colors[Message::PRIO_DEBUG] = CC_GRAY; - _colors[Message::PRIO_TRACE] = CC_GRAY; -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/adler32.c b/base/poco/Foundation/src/adler32.c deleted file mode 100644 index 8fa49192ada..00000000000 --- a/base/poco/Foundation/src/adler32.c +++ /dev/null @@ -1,188 +0,0 @@ -/* adler32.c -- compute the Adler-32 checksum of a data stream - * Copyright (C) 1995-2011, 2016 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* @(#) $Id$ */ - -#include "zutil.h" - -#define local static - -local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2)); - -#define BASE 65521U /* largest prime smaller than 65536 */ -#define NMAX 5552 -/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ - -#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;} -#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); -#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); -#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); -#define DO16(buf) DO8(buf,0); DO8(buf,8); - -/* use NO_DIVIDE if your processor does not do division in hardware -- - try it both ways to see which is faster */ -#ifdef NO_DIVIDE -/* note that this assumes BASE is 65521, where 65536 % 65521 == 15 - (thank you to John Reiser for pointing this out) */ -# define CHOP(a) \ - do { \ - unsigned long tmp = a >> 16; \ - a &= 0xffffUL; \ - a += (tmp << 4) - tmp; \ - } while (0) -# define MOD28(a) \ - do { \ - CHOP(a); \ - if (a >= BASE) a -= BASE; \ - } while (0) -# define MOD(a) \ - do { \ - CHOP(a); \ - MOD28(a); \ - } while (0) -# define MOD63(a) \ - do { /* this assumes a is not negative */ \ - z_off64_t tmp = a >> 32; \ - a &= 0xffffffffL; \ - a += (tmp << 8) - (tmp << 5) + tmp; \ - tmp = a >> 16; \ - a &= 0xffffL; \ - a += (tmp << 4) - tmp; \ - tmp = a >> 16; \ - a &= 0xffffL; \ - a += (tmp << 4) - tmp; \ - if (a >= BASE) a -= BASE; \ - } while (0) -#else -# define MOD(a) a %= BASE -# define MOD28(a) a %= BASE -# define MOD63(a) a %= BASE -#endif - -/* ========================================================================= */ -uLong ZEXPORT adler32_z(adler, buf, len) - uLong adler; - const Bytef *buf; - z_size_t len; -{ - unsigned long sum2; - unsigned n; - - /* split Adler-32 into component sums */ - sum2 = (adler >> 16) & 0xffff; - adler &= 0xffff; - - /* in case user likes doing a byte at a time, keep it fast */ - if (len == 1) { - adler += buf[0]; - if (adler >= BASE) - adler -= BASE; - sum2 += adler; - if (sum2 >= BASE) - sum2 -= BASE; - return adler | (sum2 << 16); - } - - /* initial Adler-32 value (deferred check for len == 1 speed) */ - if (buf == Z_NULL) - return 1L; - - /* in case short lengths are provided, keep it somewhat fast */ - if (len < 16) { - while (len--) { - adler += *buf++; - sum2 += adler; - } - if (adler >= BASE) - adler -= BASE; - MOD28(sum2); /* only added so many BASE's */ - return adler | (sum2 << 16); - } - - /* do length NMAX blocks -- requires just one modulo operation */ - while (len >= NMAX) { - len -= NMAX; - n = NMAX / 16; /* NMAX is divisible by 16 */ - do { - DO16(buf); /* 16 sums unrolled */ - buf += 16; - } while (--n); - MOD(adler); - MOD(sum2); - } - - /* do remaining bytes (less than NMAX, still just one modulo) */ - if (len) { /* avoid modulos if none remaining */ - while (len >= 16) { - len -= 16; - DO16(buf); - buf += 16; - } - while (len--) { - adler += *buf++; - sum2 += adler; - } - MOD(adler); - MOD(sum2); - } - - /* return recombined sums */ - return adler | (sum2 << 16); -} - -/* ========================================================================= */ -uLong ZEXPORT adler32(adler, buf, len) - uLong adler; - const Bytef *buf; - uInt len; -{ - return adler32_z(adler, buf, len); -} - -/* ========================================================================= */ -local uLong adler32_combine_(adler1, adler2, len2) - uLong adler1; - uLong adler2; - z_off64_t len2; -{ - unsigned long sum1; - unsigned long sum2; - unsigned rem; - - /* for negative len, return invalid adler32 as a clue for debugging */ - if (len2 < 0) - return 0xffffffffUL; - - /* the derivation of this formula is left as an exercise for the reader */ - MOD63(len2); /* assumes len2 >= 0 */ - rem = (unsigned)len2; - sum1 = adler1 & 0xffff; - sum2 = rem * sum1; - MOD(sum2); - sum1 += (adler2 & 0xffff) + BASE - 1; - sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; - if (sum1 >= BASE) sum1 -= BASE; - if (sum1 >= BASE) sum1 -= BASE; - if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1); - if (sum2 >= BASE) sum2 -= BASE; - return sum1 | (sum2 << 16); -} - -/* ========================================================================= */ -uLong ZEXPORT adler32_combine(adler1, adler2, len2) - uLong adler1; - uLong adler2; - z_off_t len2; -{ - return adler32_combine_(adler1, adler2, len2); -} - -uLong ZEXPORT adler32_combine64(adler1, adler2, len2) - uLong adler1; - uLong adler2; - z_off64_t len2; -{ - return adler32_combine_(adler1, adler2, len2); -} diff --git a/base/poco/Foundation/src/bignum-dtoa.cc b/base/poco/Foundation/src/bignum-dtoa.cc deleted file mode 100644 index 5a44adfccf3..00000000000 --- a/base/poco/Foundation/src/bignum-dtoa.cc +++ /dev/null @@ -1,641 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include - -#include "bignum-dtoa.h" - -#include "bignum.h" -#include "ieee.h" - -namespace double_conversion { - -static int NormalizedExponent(uint64_t significand, int exponent) { - ASSERT(significand != 0); - while ((significand & Double::kHiddenBit) == 0) { - significand = significand << 1; - exponent = exponent - 1; - } - return exponent; -} - - -// Forward declarations: -// Returns an estimation of k such that 10^(k-1) <= v < 10^k. -static int EstimatePower(int exponent); -// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator -// and denominator. -static void InitialScaledStartValues(uint64_t significand, - int exponent, - bool lower_boundary_is_closer, - int estimated_power, - bool need_boundary_deltas, - Bignum* numerator, - Bignum* denominator, - Bignum* delta_minus, - Bignum* delta_plus); -// Multiplies numerator/denominator so that its values lies in the range 1-10. -// Returns decimal_point s.t. -// v = numerator'/denominator' * 10^(decimal_point-1) -// where numerator' and denominator' are the values of numerator and -// denominator after the call to this function. -static void FixupMultiply10(int estimated_power, bool is_even, - int* decimal_point, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus); -// Generates digits from the left to the right and stops when the generated -// digits yield the shortest decimal representation of v. -static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus, - bool is_even, - Vector buffer, int* length); -// Generates 'requested_digits' after the decimal point. -static void BignumToFixed(int requested_digits, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length); -// Generates 'count' digits of numerator/denominator. -// Once 'count' digits have been produced rounds the result depending on the -// remainder (remainders of exactly .5 round upwards). Might update the -// decimal_point when rounding up (for example for 0.9999). -static void GenerateCountedDigits(int count, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length); - - -void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, - Vector buffer, int* length, int* decimal_point) { - ASSERT(v > 0); - ASSERT(!Double(v).IsSpecial()); - uint64_t significand; - int exponent; - bool lower_boundary_is_closer; - if (mode == BIGNUM_DTOA_SHORTEST_SINGLE) { - float f = static_cast(v); - ASSERT(f == v); - significand = Single(f).Significand(); - exponent = Single(f).Exponent(); - lower_boundary_is_closer = Single(f).LowerBoundaryIsCloser(); - } else { - significand = Double(v).Significand(); - exponent = Double(v).Exponent(); - lower_boundary_is_closer = Double(v).LowerBoundaryIsCloser(); - } - bool need_boundary_deltas = - (mode == BIGNUM_DTOA_SHORTEST || mode == BIGNUM_DTOA_SHORTEST_SINGLE); - - bool is_even = (significand & 1) == 0; - int normalized_exponent = NormalizedExponent(significand, exponent); - // estimated_power might be too low by 1. - int estimated_power = EstimatePower(normalized_exponent); - - // Shortcut for Fixed. - // The requested digits correspond to the digits after the point. If the - // number is much too small, then there is no need in trying to get any - // digits. - if (mode == BIGNUM_DTOA_FIXED && -estimated_power - 1 > requested_digits) { - buffer[0] = '\0'; - *length = 0; - // Set decimal-point to -requested_digits. This is what Gay does. - // Note that it should not have any effect anyways since the string is - // empty. - *decimal_point = -requested_digits; - return; - } - - Bignum numerator; - Bignum denominator; - Bignum delta_minus; - Bignum delta_plus; - // Make sure the bignum can grow large enough. The smallest double equals - // 4e-324. In this case the denominator needs fewer than 324*4 binary digits. - // The maximum double is 1.7976931348623157e308 which needs fewer than - // 308*4 binary digits. - ASSERT(Bignum::kMaxSignificantBits >= 324*4); - InitialScaledStartValues(significand, exponent, lower_boundary_is_closer, - estimated_power, need_boundary_deltas, - &numerator, &denominator, - &delta_minus, &delta_plus); - // We now have v = (numerator / denominator) * 10^estimated_power. - FixupMultiply10(estimated_power, is_even, decimal_point, - &numerator, &denominator, - &delta_minus, &delta_plus); - // We now have v = (numerator / denominator) * 10^(decimal_point-1), and - // 1 <= (numerator + delta_plus) / denominator < 10 - switch (mode) { - case BIGNUM_DTOA_SHORTEST: - case BIGNUM_DTOA_SHORTEST_SINGLE: - GenerateShortestDigits(&numerator, &denominator, - &delta_minus, &delta_plus, - is_even, buffer, length); - break; - case BIGNUM_DTOA_FIXED: - BignumToFixed(requested_digits, decimal_point, - &numerator, &denominator, - buffer, length); - break; - case BIGNUM_DTOA_PRECISION: - GenerateCountedDigits(requested_digits, decimal_point, - &numerator, &denominator, - buffer, length); - break; - default: - UNREACHABLE(); - } - buffer[*length] = '\0'; -} - - -// The procedure starts generating digits from the left to the right and stops -// when the generated digits yield the shortest decimal representation of v. A -// decimal representation of v is a number lying closer to v than to any other -// double, so it converts to v when read. -// -// This is true if d, the decimal representation, is between m- and m+, the -// upper and lower boundaries. d must be strictly between them if !is_even. -// m- := (numerator - delta_minus) / denominator -// m+ := (numerator + delta_plus) / denominator -// -// Precondition: 0 <= (numerator+delta_plus) / denominator < 10. -// If 1 <= (numerator+delta_plus) / denominator < 10 then no leading 0 digit -// will be produced. This should be the standard precondition. -static void GenerateShortestDigits(Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus, - bool is_even, - Vector buffer, int* length) { - // Small optimization: if delta_minus and delta_plus are the same just reuse - // one of the two bignums. - if (Bignum::Equal(*delta_minus, *delta_plus)) { - delta_plus = delta_minus; - } - *length = 0; - for (;;) { - uint16_t digit; - digit = numerator->DivideModuloIntBignum(*denominator); - ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. - // digit = numerator / denominator (integer division). - // numerator = numerator % denominator. - buffer[(*length)++] = static_cast(digit + '0'); - - // Can we stop already? - // If the remainder of the division is less than the distance to the lower - // boundary we can stop. In this case we simply round down (discarding the - // remainder). - // Similarly we test if we can round up (using the upper boundary). - bool in_delta_room_minus; - bool in_delta_room_plus; - if (is_even) { - in_delta_room_minus = Bignum::LessEqual(*numerator, *delta_minus); - } else { - in_delta_room_minus = Bignum::Less(*numerator, *delta_minus); - } - if (is_even) { - in_delta_room_plus = - Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; - } else { - in_delta_room_plus = - Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; - } - if (!in_delta_room_minus && !in_delta_room_plus) { - // Prepare for next iteration. - numerator->Times10(); - delta_minus->Times10(); - // We optimized delta_plus to be equal to delta_minus (if they share the - // same value). So don't multiply delta_plus if they point to the same - // object. - if (delta_minus != delta_plus) { - delta_plus->Times10(); - } - } else if (in_delta_room_minus && in_delta_room_plus) { - // Let's see if 2*numerator < denominator. - // If yes, then the next digit would be < 5 and we can round down. - int compare = Bignum::PlusCompare(*numerator, *numerator, *denominator); - if (compare < 0) { - // Remaining digits are less than .5. -> Round down (== do nothing). - } else if (compare > 0) { - // Remaining digits are more than .5 of denominator. -> Round up. - // Note that the last digit could not be a '9' as otherwise the whole - // loop would have stopped earlier. - // We still have an assert here in case the preconditions were not - // satisfied. - ASSERT(buffer[(*length) - 1] != '9'); - buffer[(*length) - 1]++; - } else { - // Halfway case. - // TODO(floitsch): need a way to solve half-way cases. - // For now let's round towards even (since this is what Gay seems to - // do). - - if ((buffer[(*length) - 1] - '0') % 2 == 0) { - // Round down => Do nothing. - } else { - ASSERT(buffer[(*length) - 1] != '9'); - buffer[(*length) - 1]++; - } - } - return; - } else if (in_delta_room_minus) { - // Round down (== do nothing). - return; - } else { // in_delta_room_plus - // Round up. - // Note again that the last digit could not be '9' since this would have - // stopped the loop earlier. - // We still have an ASSERT here, in case the preconditions were not - // satisfied. - ASSERT(buffer[(*length) -1] != '9'); - buffer[(*length) - 1]++; - return; - } - } -} - - -// Let v = numerator / denominator < 10. -// Then we generate 'count' digits of d = x.xxxxx... (without the decimal point) -// from left to right. Once 'count' digits have been produced we decide whether -// to round up or down. Remainders of exactly .5 round upwards. Numbers such -// as 9.999999 propagate a carry all the way, and change the -// exponent (decimal_point), when rounding upwards. -static void GenerateCountedDigits(int count, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector buffer, int* length) { - ASSERT(count >= 0); - for (int i = 0; i < count - 1; ++i) { - uint16_t digit; - digit = numerator->DivideModuloIntBignum(*denominator); - ASSERT(digit <= 9); // digit is a uint16_t and therefore always positive. - // digit = numerator / denominator (integer division). - // numerator = numerator % denominator. - buffer[i] = static_cast(digit + '0'); - // Prepare for next iteration. - numerator->Times10(); - } - // Generate the last digit. - uint16_t digit; - digit = numerator->DivideModuloIntBignum(*denominator); - if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { - digit++; - } - ASSERT(digit <= 10); - buffer[count - 1] = static_cast(digit + '0'); - // Correct bad digits (in case we had a sequence of '9's). Propagate the - // carry until we hat a non-'9' or til we reach the first digit. - for (int i = count - 1; i > 0; --i) { - if (buffer[i] != '0' + 10) break; - buffer[i] = '0'; - buffer[i - 1]++; - } - if (buffer[0] == '0' + 10) { - // Propagate a carry past the top place. - buffer[0] = '1'; - (*decimal_point)++; - } - *length = count; -} - - -// Generates 'requested_digits' after the decimal point. It might omit -// trailing '0's. If the input number is too small then no digits at all are -// generated (ex.: 2 fixed digits for 0.00001). -// -// Input verifies: 1 <= (numerator + delta) / denominator < 10. -static void BignumToFixed(int requested_digits, int* decimal_point, - Bignum* numerator, Bignum* denominator, - Vector(buffer), int* length) { - // Note that we have to look at more than just the requested_digits, since - // a number could be rounded up. Example: v=0.5 with requested_digits=0. - // Even though the power of v equals 0 we can't just stop here. - if (-(*decimal_point) > requested_digits) { - // The number is definitively too small. - // Ex: 0.001 with requested_digits == 1. - // Set decimal-point to -requested_digits. This is what Gay does. - // Note that it should not have any effect anyways since the string is - // empty. - *decimal_point = -requested_digits; - *length = 0; - return; - } else if (-(*decimal_point) == requested_digits) { - // We only need to verify if the number rounds down or up. - // Ex: 0.04 and 0.06 with requested_digits == 1. - ASSERT(*decimal_point == -requested_digits); - // Initially the fraction lies in range (1, 10]. Multiply the denominator - // by 10 so that we can compare more easily. - denominator->Times10(); - if (Bignum::PlusCompare(*numerator, *numerator, *denominator) >= 0) { - // If the fraction is >= 0.5 then we have to include the rounded - // digit. - buffer[0] = '1'; - *length = 1; - (*decimal_point)++; - } else { - // Note that we caught most of similar cases earlier. - *length = 0; - } - return; - } else { - // The requested digits correspond to the digits after the point. - // The variable 'needed_digits' includes the digits before the point. - int needed_digits = (*decimal_point) + requested_digits; - GenerateCountedDigits(needed_digits, decimal_point, - numerator, denominator, - buffer, length); - } -} - - -// Returns an estimation of k such that 10^(k-1) <= v < 10^k where -// v = f * 2^exponent and 2^52 <= f < 2^53. -// v is hence a normalized double with the given exponent. The output is an -// approximation for the exponent of the decimal approimation .digits * 10^k. -// -// The result might undershoot by 1 in which case 10^k <= v < 10^k+1. -// Note: this property holds for v's upper boundary m+ too. -// 10^k <= m+ < 10^k+1. -// (see explanation below). -// -// Examples: -// EstimatePower(0) => 16 -// EstimatePower(-52) => 0 -// -// Note: e >= 0 => EstimatedPower(e) > 0. No similar claim can be made for e<0. -static int EstimatePower(int exponent) { - // This function estimates log10 of v where v = f*2^e (with e == exponent). - // Note that 10^floor(log10(v)) <= v, but v <= 10^ceil(log10(v)). - // Note that f is bounded by its container size. Let p = 53 (the double's - // significand size). Then 2^(p-1) <= f < 2^p. - // - // Given that log10(v) == log2(v)/log2(10) and e+(len(f)-1) is quite close - // to log2(v) the function is simplified to (e+(len(f)-1)/log2(10)). - // The computed number undershoots by less than 0.631 (when we compute log3 - // and not log10). - // - // Optimization: since we only need an approximated result this computation - // can be performed on 64 bit integers. On x86/x64 architecture the speedup is - // not really measurable, though. - // - // Since we want to avoid overshooting we decrement by 1e10 so that - // floating-point imprecisions don't affect us. - // - // Explanation for v's boundary m+: the computation takes advantage of - // the fact that 2^(p-1) <= f < 2^p. Boundaries still satisfy this requirement - // (even for denormals where the delta can be much more important). - - const double k1Log10 = 0.30102999566398114; // 1/lg(10) - - // For doubles len(f) == 53 (don't forget the hidden bit). - const int kSignificandSize = Double::kSignificandSize; - double estimate = ceil((exponent + kSignificandSize - 1) * k1Log10 - 1e-10); - return static_cast(estimate); -} - - -// See comments for InitialScaledStartValues. -static void InitialScaledStartValuesPositiveExponent( - uint64_t significand, int exponent, - int estimated_power, bool need_boundary_deltas, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - // A positive exponent implies a positive power. - ASSERT(estimated_power >= 0); - // Since the estimated_power is positive we simply multiply the denominator - // by 10^estimated_power. - - // numerator = v. - numerator->AssignUInt64(significand); - numerator->ShiftLeft(exponent); - // denominator = 10^estimated_power. - denominator->AssignPowerUInt16(10, estimated_power); - - if (need_boundary_deltas) { - // Introduce a common denominator so that the deltas to the boundaries are - // integers. - denominator->ShiftLeft(1); - numerator->ShiftLeft(1); - // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common - // denominator (of 2) delta_plus equals 2^e. - delta_plus->AssignUInt16(1); - delta_plus->ShiftLeft(exponent); - // Same for delta_minus. The adjustments if f == 2^p-1 are done later. - delta_minus->AssignUInt16(1); - delta_minus->ShiftLeft(exponent); - } -} - - -// See comments for InitialScaledStartValues -static void InitialScaledStartValuesNegativeExponentPositivePower( - uint64_t significand, int exponent, - int estimated_power, bool need_boundary_deltas, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - // v = f * 2^e with e < 0, and with estimated_power >= 0. - // This means that e is close to 0 (have a look at how estimated_power is - // computed). - - // numerator = significand - // since v = significand * 2^exponent this is equivalent to - // numerator = v * / 2^-exponent - numerator->AssignUInt64(significand); - // denominator = 10^estimated_power * 2^-exponent (with exponent < 0) - denominator->AssignPowerUInt16(10, estimated_power); - denominator->ShiftLeft(-exponent); - - if (need_boundary_deltas) { - // Introduce a common denominator so that the deltas to the boundaries are - // integers. - denominator->ShiftLeft(1); - numerator->ShiftLeft(1); - // Let v = f * 2^e, then m+ - v = 1/2 * 2^e; With the common - // denominator (of 2) delta_plus equals 2^e. - // Given that the denominator already includes v's exponent the distance - // to the boundaries is simply 1. - delta_plus->AssignUInt16(1); - // Same for delta_minus. The adjustments if f == 2^p-1 are done later. - delta_minus->AssignUInt16(1); - } -} - - -// See comments for InitialScaledStartValues -static void InitialScaledStartValuesNegativeExponentNegativePower( - uint64_t significand, int exponent, - int estimated_power, bool need_boundary_deltas, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - // Instead of multiplying the denominator with 10^estimated_power we - // multiply all values (numerator and deltas) by 10^-estimated_power. - - // Use numerator as temporary container for power_ten. - Bignum* power_ten = numerator; - power_ten->AssignPowerUInt16(10, -estimated_power); - - if (need_boundary_deltas) { - // Since power_ten == numerator we must make a copy of 10^estimated_power - // before we complete the computation of the numerator. - // delta_plus = delta_minus = 10^estimated_power - delta_plus->AssignBignum(*power_ten); - delta_minus->AssignBignum(*power_ten); - } - - // numerator = significand * 2 * 10^-estimated_power - // since v = significand * 2^exponent this is equivalent to - // numerator = v * 10^-estimated_power * 2 * 2^-exponent. - // Remember: numerator has been abused as power_ten. So no need to assign it - // to itself. - ASSERT(numerator == power_ten); - numerator->MultiplyByUInt64(significand); - - // denominator = 2 * 2^-exponent with exponent < 0. - denominator->AssignUInt16(1); - denominator->ShiftLeft(-exponent); - - if (need_boundary_deltas) { - // Introduce a common denominator so that the deltas to the boundaries are - // integers. - numerator->ShiftLeft(1); - denominator->ShiftLeft(1); - // With this shift the boundaries have their correct value, since - // delta_plus = 10^-estimated_power, and - // delta_minus = 10^-estimated_power. - // These assignments have been done earlier. - // The adjustments if f == 2^p-1 (lower boundary is closer) are done later. - } -} - - -// Let v = significand * 2^exponent. -// Computes v / 10^estimated_power exactly, as a ratio of two bignums, numerator -// and denominator. The functions GenerateShortestDigits and -// GenerateCountedDigits will then convert this ratio to its decimal -// representation d, with the required accuracy. -// Then d * 10^estimated_power is the representation of v. -// (Note: the fraction and the estimated_power might get adjusted before -// generating the decimal representation.) -// -// The initial start values consist of: -// - a scaled numerator: s.t. numerator/denominator == v / 10^estimated_power. -// - a scaled (common) denominator. -// optionally (used by GenerateShortestDigits to decide if it has the shortest -// decimal converting back to v): -// - v - m-: the distance to the lower boundary. -// - m+ - v: the distance to the upper boundary. -// -// v, m+, m-, and therefore v - m- and m+ - v all share the same denominator. -// -// Let ep == estimated_power, then the returned values will satisfy: -// v / 10^ep = numerator / denominator. -// v's boundaries m- and m+: -// m- / 10^ep == v / 10^ep - delta_minus / denominator -// m+ / 10^ep == v / 10^ep + delta_plus / denominator -// Or in other words: -// m- == v - delta_minus * 10^ep / denominator; -// m+ == v + delta_plus * 10^ep / denominator; -// -// Since 10^(k-1) <= v < 10^k (with k == estimated_power) -// or 10^k <= v < 10^(k+1) -// we then have 0.1 <= numerator/denominator < 1 -// or 1 <= numerator/denominator < 10 -// -// It is then easy to kickstart the digit-generation routine. -// -// The boundary-deltas are only filled if the mode equals BIGNUM_DTOA_SHORTEST -// or BIGNUM_DTOA_SHORTEST_SINGLE. - -static void InitialScaledStartValues(uint64_t significand, - int exponent, - bool lower_boundary_is_closer, - int estimated_power, - bool need_boundary_deltas, - Bignum* numerator, - Bignum* denominator, - Bignum* delta_minus, - Bignum* delta_plus) { - if (exponent >= 0) { - InitialScaledStartValuesPositiveExponent( - significand, exponent, estimated_power, need_boundary_deltas, - numerator, denominator, delta_minus, delta_plus); - } else if (estimated_power >= 0) { - InitialScaledStartValuesNegativeExponentPositivePower( - significand, exponent, estimated_power, need_boundary_deltas, - numerator, denominator, delta_minus, delta_plus); - } else { - InitialScaledStartValuesNegativeExponentNegativePower( - significand, exponent, estimated_power, need_boundary_deltas, - numerator, denominator, delta_minus, delta_plus); - } - - if (need_boundary_deltas && lower_boundary_is_closer) { - // The lower boundary is closer at half the distance of "normal" numbers. - // Increase the common denominator and adapt all but the delta_minus. - denominator->ShiftLeft(1); // *2 - numerator->ShiftLeft(1); // *2 - delta_plus->ShiftLeft(1); // *2 - } -} - - -// This routine multiplies numerator/denominator so that its values lies in the -// range 1-10. That is after a call to this function we have: -// 1 <= (numerator + delta_plus) /denominator < 10. -// Let numerator the input before modification and numerator' the argument -// after modification, then the output-parameter decimal_point is such that -// numerator / denominator * 10^estimated_power == -// numerator' / denominator' * 10^(decimal_point - 1) -// In some cases estimated_power was too low, and this is already the case. We -// then simply adjust the power so that 10^(k-1) <= v < 10^k (with k == -// estimated_power) but do not touch the numerator or denominator. -// Otherwise the routine multiplies the numerator and the deltas by 10. -static void FixupMultiply10(int estimated_power, bool is_even, - int* decimal_point, - Bignum* numerator, Bignum* denominator, - Bignum* delta_minus, Bignum* delta_plus) { - bool in_range; - if (is_even) { - // For IEEE doubles half-way cases (in decimal system numbers ending with 5) - // are rounded to the closest floating-point number with even significand. - in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) >= 0; - } else { - in_range = Bignum::PlusCompare(*numerator, *delta_plus, *denominator) > 0; - } - if (in_range) { - // Since numerator + delta_plus >= denominator we already have - // 1 <= numerator/denominator < 10. Simply update the estimated_power. - *decimal_point = estimated_power + 1; - } else { - *decimal_point = estimated_power; - numerator->Times10(); - if (Bignum::Equal(*delta_minus, *delta_plus)) { - delta_minus->Times10(); - delta_plus->AssignBignum(*delta_minus); - } else { - delta_minus->Times10(); - delta_plus->Times10(); - } - } -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/bignum-dtoa.h b/base/poco/Foundation/src/bignum-dtoa.h deleted file mode 100644 index 6ee62aa23df..00000000000 --- a/base/poco/Foundation/src/bignum-dtoa.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_BIGNUM_DTOA_H_ -#define DOUBLE_CONVERSION_BIGNUM_DTOA_H_ - -#include "utils.h" - -namespace double_conversion -{ - -enum BignumDtoaMode -{ - // Return the shortest correct representation. - // For example the output of 0.299999999999999988897 is (the less accurate but - // correct) 0.3. - BIGNUM_DTOA_SHORTEST, - // Same as BIGNUM_DTOA_SHORTEST but for single-precision floats. - BIGNUM_DTOA_SHORTEST_SINGLE, - // Return a fixed number of digits after the decimal point. - // For instance fixed(0.1, 4) becomes 0.1000 - // If the input number is big, the output will be big. - BIGNUM_DTOA_FIXED, - // Return a fixed number of digits, no matter what the exponent is. - BIGNUM_DTOA_PRECISION -}; - -// Converts the given double 'v' to ascii. -// The result should be interpreted as buffer * 10^(point-length). -// The buffer will be null-terminated. -// -// The input v must be > 0 and different from NaN, and Infinity. -// -// The output depends on the given mode: -// - SHORTEST: produce the least amount of digits for which the internal -// identity requirement is still satisfied. If the digits are printed -// (together with the correct exponent) then reading this number will give -// 'v' again. The buffer will choose the representation that is closest to -// 'v'. If there are two at the same distance, than the number is round up. -// In this mode the 'requested_digits' parameter is ignored. -// - FIXED: produces digits necessary to print a given number with -// 'requested_digits' digits after the decimal point. The produced digits -// might be too short in which case the caller has to fill the gaps with '0's. -// Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. -// Halfway cases are rounded up. The call toFixed(0.15, 2) thus returns -// buffer="2", point=0. -// Note: the length of the returned buffer has no meaning wrt the significance -// of its digits. That is, just because it contains '0's does not mean that -// any other digit would not satisfy the internal identity requirement. -// - PRECISION: produces 'requested_digits' where the first digit is not '0'. -// Even though the length of produced digits usually equals -// 'requested_digits', the function is allowed to return fewer digits, in -// which case the caller has to fill the missing digits with '0's. -// Halfway cases are again rounded up. -// 'BignumDtoa' expects the given buffer to be big enough to hold all digits -// and a terminating null-character. -void BignumDtoa(double v, BignumDtoaMode mode, int requested_digits, Vector buffer, int * length, int * point); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_BIGNUM_DTOA_H_ diff --git a/base/poco/Foundation/src/bignum.cc b/base/poco/Foundation/src/bignum.cc deleted file mode 100644 index 2743d67e8d9..00000000000 --- a/base/poco/Foundation/src/bignum.cc +++ /dev/null @@ -1,766 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "bignum.h" -#include "utils.h" - -namespace double_conversion { - -Bignum::Bignum() - : bigits_(bigits_buffer_, kBigitCapacity), used_digits_(0), exponent_(0) { - for (int i = 0; i < kBigitCapacity; ++i) { - bigits_[i] = 0; - } -} - - -template -static int BitSize(S value) { - (void) value; // Mark variable as used. - return 8 * sizeof(value); -} - -// Guaranteed to lie in one Bigit. -void Bignum::AssignUInt16(uint16_t value) { - ASSERT(kBigitSize >= BitSize(value)); - Zero(); - if (value == 0) return; - - EnsureCapacity(1); - bigits_[0] = value; - used_digits_ = 1; -} - - -void Bignum::AssignUInt64(uint64_t value) { - const int kUInt64Size = 64; - - Zero(); - if (value == 0) return; - - int needed_bigits = kUInt64Size / kBigitSize + 1; - EnsureCapacity(needed_bigits); - for (int i = 0; i < needed_bigits; ++i) { - bigits_[i] = value & kBigitMask; - value = value >> kBigitSize; - } - used_digits_ = needed_bigits; - Clamp(); -} - - -void Bignum::AssignBignum(const Bignum& other) { - exponent_ = other.exponent_; - for (int i = 0; i < other.used_digits_; ++i) { - bigits_[i] = other.bigits_[i]; - } - // Clear the excess digits (if there were any). - for (int i = other.used_digits_; i < used_digits_; ++i) { - bigits_[i] = 0; - } - used_digits_ = other.used_digits_; -} - - -static uint64_t ReadUInt64(Vector buffer, - int from, - int digits_to_read) { - uint64_t result = 0; - for (int i = from; i < from + digits_to_read; ++i) { - int digit = buffer[i] - '0'; - ASSERT(0 <= digit && digit <= 9); - result = result * 10 + digit; - } - return result; -} - - -void Bignum::AssignDecimalString(Vector value) { - // 2^64 = 18446744073709551616 > 10^19 - const int kMaxUint64DecimalDigits = 19; - Zero(); - int length = value.length(); - int pos = 0; - // Let's just say that each digit needs 4 bits. - while (length >= kMaxUint64DecimalDigits) { - uint64_t digits = ReadUInt64(value, pos, kMaxUint64DecimalDigits); - pos += kMaxUint64DecimalDigits; - length -= kMaxUint64DecimalDigits; - MultiplyByPowerOfTen(kMaxUint64DecimalDigits); - AddUInt64(digits); - } - uint64_t digits = ReadUInt64(value, pos, length); - MultiplyByPowerOfTen(length); - AddUInt64(digits); - Clamp(); -} - - -static int HexCharValue(char c) { - if ('0' <= c && c <= '9') return c - '0'; - if ('a' <= c && c <= 'f') return 10 + c - 'a'; - ASSERT('A' <= c && c <= 'F'); - return 10 + c - 'A'; -} - - -void Bignum::AssignHexString(Vector value) { - Zero(); - int length = value.length(); - - int needed_bigits = length * 4 / kBigitSize + 1; - EnsureCapacity(needed_bigits); - int string_index = length - 1; - for (int i = 0; i < needed_bigits - 1; ++i) { - // These bigits are guaranteed to be "full". - Chunk current_bigit = 0; - for (int j = 0; j < kBigitSize / 4; j++) { - current_bigit += HexCharValue(value[string_index--]) << (j * 4); - } - bigits_[i] = current_bigit; - } - used_digits_ = needed_bigits - 1; - - Chunk most_significant_bigit = 0; // Could be = 0; - for (int j = 0; j <= string_index; ++j) { - most_significant_bigit <<= 4; - most_significant_bigit += HexCharValue(value[j]); - } - if (most_significant_bigit != 0) { - bigits_[used_digits_] = most_significant_bigit; - used_digits_++; - } - Clamp(); -} - - -void Bignum::AddUInt64(uint64_t operand) { - if (operand == 0) return; - Bignum other; - other.AssignUInt64(operand); - AddBignum(other); -} - - -void Bignum::AddBignum(const Bignum& other) { - ASSERT(IsClamped()); - ASSERT(other.IsClamped()); - - // If this has a greater exponent than other append zero-bigits to this. - // After this call exponent_ <= other.exponent_. - Align(other); - - // There are two possibilities: - // aaaaaaaaaaa 0000 (where the 0s represent a's exponent) - // bbbbb 00000000 - // ---------------- - // ccccccccccc 0000 - // or - // aaaaaaaaaa 0000 - // bbbbbbbbb 0000000 - // ----------------- - // cccccccccccc 0000 - // In both cases we might need a carry bigit. - - EnsureCapacity(1 + Max(BigitLength(), other.BigitLength()) - exponent_); - Chunk carry = 0; - int bigit_pos = other.exponent_ - exponent_; - ASSERT(bigit_pos >= 0); - for (int i = 0; i < other.used_digits_; ++i) { - Chunk sum = bigits_[bigit_pos] + other.bigits_[i] + carry; - bigits_[bigit_pos] = sum & kBigitMask; - carry = sum >> kBigitSize; - bigit_pos++; - } - - while (carry != 0) { - Chunk sum = bigits_[bigit_pos] + carry; - bigits_[bigit_pos] = sum & kBigitMask; - carry = sum >> kBigitSize; - bigit_pos++; - } - used_digits_ = Max(bigit_pos, used_digits_); - ASSERT(IsClamped()); -} - - -void Bignum::SubtractBignum(const Bignum& other) { - ASSERT(IsClamped()); - ASSERT(other.IsClamped()); - // We require this to be bigger than other. - ASSERT(LessEqual(other, *this)); - - Align(other); - - int offset = other.exponent_ - exponent_; - Chunk borrow = 0; - int i; - for (i = 0; i < other.used_digits_; ++i) { - ASSERT((borrow == 0) || (borrow == 1)); - Chunk difference = bigits_[i + offset] - other.bigits_[i] - borrow; - bigits_[i + offset] = difference & kBigitMask; - borrow = difference >> (kChunkSize - 1); - } - while (borrow != 0) { - Chunk difference = bigits_[i + offset] - borrow; - bigits_[i + offset] = difference & kBigitMask; - borrow = difference >> (kChunkSize - 1); - ++i; - } - Clamp(); -} - - -void Bignum::ShiftLeft(int shift_amount) { - if (used_digits_ == 0) return; - exponent_ += shift_amount / kBigitSize; - int local_shift = shift_amount % kBigitSize; - EnsureCapacity(used_digits_ + 1); - BigitsShiftLeft(local_shift); -} - - -void Bignum::MultiplyByUInt32(uint32_t factor) { - if (factor == 1) return; - if (factor == 0) { - Zero(); - return; - } - if (used_digits_ == 0) return; - - // The product of a bigit with the factor is of size kBigitSize + 32. - // Assert that this number + 1 (for the carry) fits into double chunk. - ASSERT(kDoubleChunkSize >= kBigitSize + 32 + 1); - DoubleChunk carry = 0; - for (int i = 0; i < used_digits_; ++i) { - DoubleChunk product = static_cast(factor) * bigits_[i] + carry; - bigits_[i] = static_cast(product & kBigitMask); - carry = (product >> kBigitSize); - } - while (carry != 0) { - EnsureCapacity(used_digits_ + 1); - bigits_[used_digits_] = carry & kBigitMask; - used_digits_++; - carry >>= kBigitSize; - } -} - - -void Bignum::MultiplyByUInt64(uint64_t factor) { - if (factor == 1) return; - if (factor == 0) { - Zero(); - return; - } - ASSERT(kBigitSize < 32); - uint64_t carry = 0; - uint64_t low = factor & 0xFFFFFFFF; - uint64_t high = factor >> 32; - for (int i = 0; i < used_digits_; ++i) { - uint64_t product_low = low * bigits_[i]; - uint64_t product_high = high * bigits_[i]; - uint64_t tmp = (carry & kBigitMask) + product_low; - bigits_[i] = tmp & kBigitMask; - carry = (carry >> kBigitSize) + (tmp >> kBigitSize) + - (product_high << (32 - kBigitSize)); - } - while (carry != 0) { - EnsureCapacity(used_digits_ + 1); - bigits_[used_digits_] = carry & kBigitMask; - used_digits_++; - carry >>= kBigitSize; - } -} - - -void Bignum::MultiplyByPowerOfTen(int exponent) { - const uint64_t kFive27 = UINT64_2PART_C(0x6765c793, fa10079d); - const uint16_t kFive1 = 5; - const uint16_t kFive2 = kFive1 * 5; - const uint16_t kFive3 = kFive2 * 5; - const uint16_t kFive4 = kFive3 * 5; - const uint16_t kFive5 = kFive4 * 5; - const uint16_t kFive6 = kFive5 * 5; - const uint32_t kFive7 = kFive6 * 5; - const uint32_t kFive8 = kFive7 * 5; - const uint32_t kFive9 = kFive8 * 5; - const uint32_t kFive10 = kFive9 * 5; - const uint32_t kFive11 = kFive10 * 5; - const uint32_t kFive12 = kFive11 * 5; - const uint32_t kFive13 = kFive12 * 5; - const uint32_t kFive1_to_12[] = - { kFive1, kFive2, kFive3, kFive4, kFive5, kFive6, - kFive7, kFive8, kFive9, kFive10, kFive11, kFive12 }; - - ASSERT(exponent >= 0); - if (exponent == 0) return; - if (used_digits_ == 0) return; - - // We shift by exponent at the end just before returning. - int remaining_exponent = exponent; - while (remaining_exponent >= 27) { - MultiplyByUInt64(kFive27); - remaining_exponent -= 27; - } - while (remaining_exponent >= 13) { - MultiplyByUInt32(kFive13); - remaining_exponent -= 13; - } - if (remaining_exponent > 0) { - MultiplyByUInt32(kFive1_to_12[remaining_exponent - 1]); - } - ShiftLeft(exponent); -} - - -void Bignum::Square() { - ASSERT(IsClamped()); - int product_length = 2 * used_digits_; - EnsureCapacity(product_length); - - // Comba multiplication: compute each column separately. - // Example: r = a2a1a0 * b2b1b0. - // r = 1 * a0b0 + - // 10 * (a1b0 + a0b1) + - // 100 * (a2b0 + a1b1 + a0b2) + - // 1000 * (a2b1 + a1b2) + - // 10000 * a2b2 - // - // In the worst case we have to accumulate nb-digits products of digit*digit. - // - // Assert that the additional number of bits in a DoubleChunk are enough to - // sum up used_digits of Bigit*Bigit. - if ((1 << (2 * (kChunkSize - kBigitSize))) <= used_digits_) { - UNIMPLEMENTED(); - } - DoubleChunk accumulator = 0; - // First shift the digits so we don't overwrite them. - int copy_offset = used_digits_; - for (int i = 0; i < used_digits_; ++i) { - bigits_[copy_offset + i] = bigits_[i]; - } - // We have two loops to avoid some 'if's in the loop. - for (int i = 0; i < used_digits_; ++i) { - // Process temporary digit i with power i. - // The sum of the two indices must be equal to i. - int bigit_index1 = i; - int bigit_index2 = 0; - // Sum all of the sub-products. - while (bigit_index1 >= 0) { - Chunk chunk1 = bigits_[copy_offset + bigit_index1]; - Chunk chunk2 = bigits_[copy_offset + bigit_index2]; - accumulator += static_cast(chunk1) * chunk2; - bigit_index1--; - bigit_index2++; - } - bigits_[i] = static_cast(accumulator) & kBigitMask; - accumulator >>= kBigitSize; - } - for (int i = used_digits_; i < product_length; ++i) { - int bigit_index1 = used_digits_ - 1; - int bigit_index2 = i - bigit_index1; - // Invariant: sum of both indices is again equal to i. - // Inner loop runs 0 times on last iteration, emptying accumulator. - while (bigit_index2 < used_digits_) { - Chunk chunk1 = bigits_[copy_offset + bigit_index1]; - Chunk chunk2 = bigits_[copy_offset + bigit_index2]; - accumulator += static_cast(chunk1) * chunk2; - bigit_index1--; - bigit_index2++; - } - // The overwritten bigits_[i] will never be read in further loop iterations, - // because bigit_index1 and bigit_index2 are always greater - // than i - used_digits_. - bigits_[i] = static_cast(accumulator) & kBigitMask; - accumulator >>= kBigitSize; - } - // Since the result was guaranteed to lie inside the number the - // accumulator must be 0 now. - ASSERT(accumulator == 0); - - // Don't forget to update the used_digits and the exponent. - used_digits_ = product_length; - exponent_ *= 2; - Clamp(); -} - - -void Bignum::AssignPowerUInt16(uint16_t base, int power_exponent) { - ASSERT(base != 0); - ASSERT(power_exponent >= 0); - if (power_exponent == 0) { - AssignUInt16(1); - return; - } - Zero(); - int shifts = 0; - // We expect base to be in range 2-32, and most often to be 10. - // It does not make much sense to implement different algorithms for counting - // the bits. - while ((base & 1) == 0) { - base >>= 1; - shifts++; - } - int bit_size = 0; - int tmp_base = base; - while (tmp_base != 0) { - tmp_base >>= 1; - bit_size++; - } - int final_size = bit_size * power_exponent; - // 1 extra bigit for the shifting, and one for rounded final_size. - EnsureCapacity(final_size / kBigitSize + 2); - - // Left to Right exponentiation. - int mask = 1; - while (power_exponent >= mask) mask <<= 1; - - // The mask is now pointing to the bit above the most significant 1-bit of - // power_exponent. - // Get rid of first 1-bit; - mask >>= 2; - uint64_t this_value = base; - - bool delayed_multipliciation = false; - const uint64_t max_32bits = 0xFFFFFFFF; - while (mask != 0 && this_value <= max_32bits) { - this_value = this_value * this_value; - // Verify that there is enough space in this_value to perform the - // multiplication. The first bit_size bits must be 0. - if ((power_exponent & mask) != 0) { - uint64_t base_bits_mask = - ~((static_cast(1) << (64 - bit_size)) - 1); - bool high_bits_zero = (this_value & base_bits_mask) == 0; - if (high_bits_zero) { - this_value *= base; - } else { - delayed_multipliciation = true; - } - } - mask >>= 1; - } - AssignUInt64(this_value); - if (delayed_multipliciation) { - MultiplyByUInt32(base); - } - - // Now do the same thing as a bignum. - while (mask != 0) { - Square(); - if ((power_exponent & mask) != 0) { - MultiplyByUInt32(base); - } - mask >>= 1; - } - - // And finally add the saved shifts. - ShiftLeft(shifts * power_exponent); -} - - -// Precondition: this/other < 16bit. -uint16_t Bignum::DivideModuloIntBignum(const Bignum& other) { - ASSERT(IsClamped()); - ASSERT(other.IsClamped()); - ASSERT(other.used_digits_ > 0); - - // Easy case: if we have less digits than the divisor than the result is 0. - // Note: this handles the case where this == 0, too. - if (BigitLength() < other.BigitLength()) { - return 0; - } - - Align(other); - - uint16_t result = 0; - - // Start by removing multiples of 'other' until both numbers have the same - // number of digits. - while (BigitLength() > other.BigitLength()) { - // This naive approach is extremely inefficient if `this` divided by other - // is big. This function is implemented for doubleToString where - // the result should be small (less than 10). - ASSERT(other.bigits_[other.used_digits_ - 1] >= ((1 << kBigitSize) / 16)); - ASSERT(bigits_[used_digits_ - 1] < 0x10000); - // Remove the multiples of the first digit. - // Example this = 23 and other equals 9. -> Remove 2 multiples. - result += static_cast(bigits_[used_digits_ - 1]); - SubtractTimes(other, bigits_[used_digits_ - 1]); - } - - ASSERT(BigitLength() == other.BigitLength()); - - // Both bignums are at the same length now. - // Since other has more than 0 digits we know that the access to - // bigits_[used_digits_ - 1] is safe. - Chunk this_bigit = bigits_[used_digits_ - 1]; - Chunk other_bigit = other.bigits_[other.used_digits_ - 1]; - - if (other.used_digits_ == 1) { - // Shortcut for easy (and common) case. - int quotient = this_bigit / other_bigit; - bigits_[used_digits_ - 1] = this_bigit - other_bigit * quotient; - ASSERT(quotient < 0x10000); - result += static_cast(quotient); - Clamp(); - return result; - } - - int division_estimate = this_bigit / (other_bigit + 1); - ASSERT(division_estimate < 0x10000); - result += static_cast(division_estimate); - SubtractTimes(other, division_estimate); - - if (other_bigit * (division_estimate + 1) > this_bigit) { - // No need to even try to subtract. Even if other's remaining digits were 0 - // another subtraction would be too much. - return result; - } - - while (LessEqual(other, *this)) { - SubtractBignum(other); - result++; - } - return result; -} - - -template -static int SizeInHexChars(S number) { - ASSERT(number > 0); - int result = 0; - while (number != 0) { - number >>= 4; - result++; - } - return result; -} - - -static char HexCharOfValue(int value) { - ASSERT(0 <= value && value <= 16); - if (value < 10) return static_cast(value + '0'); - return static_cast(value - 10 + 'A'); -} - - -bool Bignum::ToHexString(char* buffer, int buffer_size) const { - ASSERT(IsClamped()); - // Each bigit must be printable as separate hex-character. - ASSERT(kBigitSize % 4 == 0); - const int kHexCharsPerBigit = kBigitSize / 4; - - if (used_digits_ == 0) { - if (buffer_size < 2) return false; - buffer[0] = '0'; - buffer[1] = '\0'; - return true; - } - // We add 1 for the terminating '\0' character. - int needed_chars = (BigitLength() - 1) * kHexCharsPerBigit + - SizeInHexChars(bigits_[used_digits_ - 1]) + 1; - if (needed_chars > buffer_size) return false; - int string_index = needed_chars - 1; - buffer[string_index--] = '\0'; - for (int i = 0; i < exponent_; ++i) { - for (int j = 0; j < kHexCharsPerBigit; ++j) { - buffer[string_index--] = '0'; - } - } - for (int i = 0; i < used_digits_ - 1; ++i) { - Chunk current_bigit = bigits_[i]; - for (int j = 0; j < kHexCharsPerBigit; ++j) { - buffer[string_index--] = HexCharOfValue(current_bigit & 0xF); - current_bigit >>= 4; - } - } - // And finally the last bigit. - Chunk most_significant_bigit = bigits_[used_digits_ - 1]; - while (most_significant_bigit != 0) { - buffer[string_index--] = HexCharOfValue(most_significant_bigit & 0xF); - most_significant_bigit >>= 4; - } - return true; -} - - -Bignum::Chunk Bignum::BigitAt(int index) const { - if (index >= BigitLength()) return 0; - if (index < exponent_) return 0; - return bigits_[index - exponent_]; -} - - -int Bignum::Compare(const Bignum& a, const Bignum& b) { - ASSERT(a.IsClamped()); - ASSERT(b.IsClamped()); - int bigit_length_a = a.BigitLength(); - int bigit_length_b = b.BigitLength(); - if (bigit_length_a < bigit_length_b) return -1; - if (bigit_length_a > bigit_length_b) return +1; - for (int i = bigit_length_a - 1; i >= Min(a.exponent_, b.exponent_); --i) { - Chunk bigit_a = a.BigitAt(i); - Chunk bigit_b = b.BigitAt(i); - if (bigit_a < bigit_b) return -1; - if (bigit_a > bigit_b) return +1; - // Otherwise they are equal up to this digit. Try the next digit. - } - return 0; -} - - -int Bignum::PlusCompare(const Bignum& a, const Bignum& b, const Bignum& c) { - ASSERT(a.IsClamped()); - ASSERT(b.IsClamped()); - ASSERT(c.IsClamped()); - if (a.BigitLength() < b.BigitLength()) { - return PlusCompare(b, a, c); - } - if (a.BigitLength() + 1 < c.BigitLength()) return -1; - if (a.BigitLength() > c.BigitLength()) return +1; - // The exponent encodes 0-bigits. So if there are more 0-digits in 'a' than - // 'b' has digits, then the bigit-length of 'a'+'b' must be equal to the one - // of 'a'. - if (a.exponent_ >= b.BigitLength() && a.BigitLength() < c.BigitLength()) { - return -1; - } - - Chunk borrow = 0; - // Starting at min_exponent all digits are == 0. So no need to compare them. - int min_exponent = Min(Min(a.exponent_, b.exponent_), c.exponent_); - for (int i = c.BigitLength() - 1; i >= min_exponent; --i) { - Chunk chunk_a = a.BigitAt(i); - Chunk chunk_b = b.BigitAt(i); - Chunk chunk_c = c.BigitAt(i); - Chunk sum = chunk_a + chunk_b; - if (sum > chunk_c + borrow) { - return +1; - } else { - borrow = chunk_c + borrow - sum; - if (borrow > 1) return -1; - borrow <<= kBigitSize; - } - } - if (borrow == 0) return 0; - return -1; -} - - -void Bignum::Clamp() { - while (used_digits_ > 0 && bigits_[used_digits_ - 1] == 0) { - used_digits_--; - } - if (used_digits_ == 0) { - // Zero. - exponent_ = 0; - } -} - - -bool Bignum::IsClamped() const { - return used_digits_ == 0 || bigits_[used_digits_ - 1] != 0; -} - - -void Bignum::Zero() { - for (int i = 0; i < used_digits_; ++i) { - bigits_[i] = 0; - } - used_digits_ = 0; - exponent_ = 0; -} - - -void Bignum::Align(const Bignum& other) { - if (exponent_ > other.exponent_) { - // If "X" represents a "hidden" digit (by the exponent) then we are in the - // following case (a == this, b == other): - // a: aaaaaaXXXX or a: aaaaaXXX - // b: bbbbbbX b: bbbbbbbbXX - // We replace some of the hidden digits (X) of a with 0 digits. - // a: aaaaaa000X or a: aaaaa0XX - int zero_digits = exponent_ - other.exponent_; - EnsureCapacity(used_digits_ + zero_digits); - for (int i = used_digits_ - 1; i >= 0; --i) { - bigits_[i + zero_digits] = bigits_[i]; - } - for (int i = 0; i < zero_digits; ++i) { - bigits_[i] = 0; - } - used_digits_ += zero_digits; - exponent_ -= zero_digits; - ASSERT(used_digits_ >= 0); - ASSERT(exponent_ >= 0); - } -} - - -void Bignum::BigitsShiftLeft(int shift_amount) { - ASSERT(shift_amount < kBigitSize); - ASSERT(shift_amount >= 0); - Chunk carry = 0; - for (int i = 0; i < used_digits_; ++i) { - Chunk new_carry = bigits_[i] >> (kBigitSize - shift_amount); - bigits_[i] = ((bigits_[i] << shift_amount) + carry) & kBigitMask; - carry = new_carry; - } - if (carry != 0) { - bigits_[used_digits_] = carry; - used_digits_++; - } -} - - -void Bignum::SubtractTimes(const Bignum& other, int factor) { - ASSERT(exponent_ <= other.exponent_); - if (factor < 3) { - for (int i = 0; i < factor; ++i) { - SubtractBignum(other); - } - return; - } - Chunk borrow = 0; - int exponent_diff = other.exponent_ - exponent_; - for (int i = 0; i < other.used_digits_; ++i) { - DoubleChunk product = static_cast(factor) * other.bigits_[i]; - DoubleChunk remove = borrow + product; - Chunk difference = bigits_[i + exponent_diff] - (remove & kBigitMask); - bigits_[i + exponent_diff] = difference & kBigitMask; - borrow = static_cast((difference >> (kChunkSize - 1)) + - (remove >> kBigitSize)); - } - for (int i = other.used_digits_ + exponent_diff; i < used_digits_; ++i) { - if (borrow == 0) return; - Chunk difference = bigits_[i] - borrow; - bigits_[i] = difference & kBigitMask; - borrow = difference >> (kChunkSize - 1); - } - Clamp(); -} - - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/bignum.h b/base/poco/Foundation/src/bignum.h deleted file mode 100644 index ec56adac23f..00000000000 --- a/base/poco/Foundation/src/bignum.h +++ /dev/null @@ -1,138 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_BIGNUM_H_ -#define DOUBLE_CONVERSION_BIGNUM_H_ - -#include "utils.h" - -namespace double_conversion -{ - -class Bignum -{ -public: - // 3584 = 128 * 28. We can represent 2^3584 > 10^1000 accurately. - // This bignum can encode much bigger numbers, since it contains an - // exponent. - static const int kMaxSignificantBits = 3584; - - Bignum(); - void AssignUInt16(uint16_t value); - void AssignUInt64(uint64_t value); - void AssignBignum(const Bignum & other); - - void AssignDecimalString(Vector value); - void AssignHexString(Vector value); - - void AssignPowerUInt16(uint16_t base, int exponent); - - void AddUInt16(uint16_t operand); - void AddUInt64(uint64_t operand); - void AddBignum(const Bignum & other); - // Precondition: this >= other. - void SubtractBignum(const Bignum & other); - - void Square(); - void ShiftLeft(int shift_amount); - void MultiplyByUInt32(uint32_t factor); - void MultiplyByUInt64(uint64_t factor); - void MultiplyByPowerOfTen(int exponent); - void Times10() { return MultiplyByUInt32(10); } - // Pseudocode: - // int result = this / other; - // this = this % other; - // In the worst case this function is in O(this/other). - uint16_t DivideModuloIntBignum(const Bignum & other); - - bool ToHexString(char * buffer, int buffer_size) const; - - // Returns - // -1 if a < b, - // 0 if a == b, and - // +1 if a > b. - static int Compare(const Bignum & a, const Bignum & b); - static bool Equal(const Bignum & a, const Bignum & b) { return Compare(a, b) == 0; } - static bool LessEqual(const Bignum & a, const Bignum & b) { return Compare(a, b) <= 0; } - static bool Less(const Bignum & a, const Bignum & b) { return Compare(a, b) < 0; } - // Returns Compare(a + b, c); - static int PlusCompare(const Bignum & a, const Bignum & b, const Bignum & c); - // Returns a + b == c - static bool PlusEqual(const Bignum & a, const Bignum & b, const Bignum & c) { return PlusCompare(a, b, c) == 0; } - // Returns a + b <= c - static bool PlusLessEqual(const Bignum & a, const Bignum & b, const Bignum & c) { return PlusCompare(a, b, c) <= 0; } - // Returns a + b < c - static bool PlusLess(const Bignum & a, const Bignum & b, const Bignum & c) { return PlusCompare(a, b, c) < 0; } - -private: - typedef uint32_t Chunk; - typedef uint64_t DoubleChunk; - - static const int kChunkSize = sizeof(Chunk) * 8; - static const int kDoubleChunkSize = sizeof(DoubleChunk) * 8; - // With bigit size of 28 we loose some bits, but a double still fits easily - // into two chunks, and more importantly we can use the Comba multiplication. - static const int kBigitSize = 28; - static const Chunk kBigitMask = (1 << kBigitSize) - 1; - // Every instance allocates kBigitLength chunks on the stack. Bignums cannot - // grow. There are no checks if the stack-allocated space is sufficient. - static const int kBigitCapacity = kMaxSignificantBits / kBigitSize; - - void EnsureCapacity(int size) - { - if (size > kBigitCapacity) - { - UNREACHABLE(); - } - } - void Align(const Bignum & other); - void Clamp(); - bool IsClamped() const; - void Zero(); - // Requires this to have enough capacity (no tests done). - // Updates used_digits_ if necessary. - // shift_amount must be < kBigitSize. - void BigitsShiftLeft(int shift_amount); - // BigitLength includes the "hidden" digits encoded in the exponent. - int BigitLength() const { return used_digits_ + exponent_; } - Chunk BigitAt(int index) const; - void SubtractTimes(const Bignum & other, int factor); - - Chunk bigits_buffer_[kBigitCapacity]; - // A vector backed by bigits_buffer_. This way accesses to the array are - // checked for out-of-bounds errors. - Vector bigits_; - int used_digits_; - // The Bignum's value equals value(bigits_) * 2^(exponent_ * kBigitSize). - int exponent_; - - DISALLOW_COPY_AND_ASSIGN(Bignum); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_BIGNUM_H_ diff --git a/base/poco/Foundation/src/cached-powers.cc b/base/poco/Foundation/src/cached-powers.cc deleted file mode 100644 index d1359ffe43e..00000000000 --- a/base/poco/Foundation/src/cached-powers.cc +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2006-2008 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include -#include - -#include "utils.h" - -#include "cached-powers.h" - -namespace double_conversion { - -struct CachedPower { - uint64_t significand; - int16_t binary_exponent; - int16_t decimal_exponent; -}; - -static const CachedPower kCachedPowers[] = { - {UINT64_2PART_C(0xfa8fd5a0, 081c0288), -1220, -348}, - {UINT64_2PART_C(0xbaaee17f, a23ebf76), -1193, -340}, - {UINT64_2PART_C(0x8b16fb20, 3055ac76), -1166, -332}, - {UINT64_2PART_C(0xcf42894a, 5dce35ea), -1140, -324}, - {UINT64_2PART_C(0x9a6bb0aa, 55653b2d), -1113, -316}, - {UINT64_2PART_C(0xe61acf03, 3d1a45df), -1087, -308}, - {UINT64_2PART_C(0xab70fe17, c79ac6ca), -1060, -300}, - {UINT64_2PART_C(0xff77b1fc, bebcdc4f), -1034, -292}, - {UINT64_2PART_C(0xbe5691ef, 416bd60c), -1007, -284}, - {UINT64_2PART_C(0x8dd01fad, 907ffc3c), -980, -276}, - {UINT64_2PART_C(0xd3515c28, 31559a83), -954, -268}, - {UINT64_2PART_C(0x9d71ac8f, ada6c9b5), -927, -260}, - {UINT64_2PART_C(0xea9c2277, 23ee8bcb), -901, -252}, - {UINT64_2PART_C(0xaecc4991, 4078536d), -874, -244}, - {UINT64_2PART_C(0x823c1279, 5db6ce57), -847, -236}, - {UINT64_2PART_C(0xc2109436, 4dfb5637), -821, -228}, - {UINT64_2PART_C(0x9096ea6f, 3848984f), -794, -220}, - {UINT64_2PART_C(0xd77485cb, 25823ac7), -768, -212}, - {UINT64_2PART_C(0xa086cfcd, 97bf97f4), -741, -204}, - {UINT64_2PART_C(0xef340a98, 172aace5), -715, -196}, - {UINT64_2PART_C(0xb23867fb, 2a35b28e), -688, -188}, - {UINT64_2PART_C(0x84c8d4df, d2c63f3b), -661, -180}, - {UINT64_2PART_C(0xc5dd4427, 1ad3cdba), -635, -172}, - {UINT64_2PART_C(0x936b9fce, bb25c996), -608, -164}, - {UINT64_2PART_C(0xdbac6c24, 7d62a584), -582, -156}, - {UINT64_2PART_C(0xa3ab6658, 0d5fdaf6), -555, -148}, - {UINT64_2PART_C(0xf3e2f893, dec3f126), -529, -140}, - {UINT64_2PART_C(0xb5b5ada8, aaff80b8), -502, -132}, - {UINT64_2PART_C(0x87625f05, 6c7c4a8b), -475, -124}, - {UINT64_2PART_C(0xc9bcff60, 34c13053), -449, -116}, - {UINT64_2PART_C(0x964e858c, 91ba2655), -422, -108}, - {UINT64_2PART_C(0xdff97724, 70297ebd), -396, -100}, - {UINT64_2PART_C(0xa6dfbd9f, b8e5b88f), -369, -92}, - {UINT64_2PART_C(0xf8a95fcf, 88747d94), -343, -84}, - {UINT64_2PART_C(0xb9447093, 8fa89bcf), -316, -76}, - {UINT64_2PART_C(0x8a08f0f8, bf0f156b), -289, -68}, - {UINT64_2PART_C(0xcdb02555, 653131b6), -263, -60}, - {UINT64_2PART_C(0x993fe2c6, d07b7fac), -236, -52}, - {UINT64_2PART_C(0xe45c10c4, 2a2b3b06), -210, -44}, - {UINT64_2PART_C(0xaa242499, 697392d3), -183, -36}, - {UINT64_2PART_C(0xfd87b5f2, 8300ca0e), -157, -28}, - {UINT64_2PART_C(0xbce50864, 92111aeb), -130, -20}, - {UINT64_2PART_C(0x8cbccc09, 6f5088cc), -103, -12}, - {UINT64_2PART_C(0xd1b71758, e219652c), -77, -4}, - {UINT64_2PART_C(0x9c400000, 00000000), -50, 4}, - {UINT64_2PART_C(0xe8d4a510, 00000000), -24, 12}, - {UINT64_2PART_C(0xad78ebc5, ac620000), 3, 20}, - {UINT64_2PART_C(0x813f3978, f8940984), 30, 28}, - {UINT64_2PART_C(0xc097ce7b, c90715b3), 56, 36}, - {UINT64_2PART_C(0x8f7e32ce, 7bea5c70), 83, 44}, - {UINT64_2PART_C(0xd5d238a4, abe98068), 109, 52}, - {UINT64_2PART_C(0x9f4f2726, 179a2245), 136, 60}, - {UINT64_2PART_C(0xed63a231, d4c4fb27), 162, 68}, - {UINT64_2PART_C(0xb0de6538, 8cc8ada8), 189, 76}, - {UINT64_2PART_C(0x83c7088e, 1aab65db), 216, 84}, - {UINT64_2PART_C(0xc45d1df9, 42711d9a), 242, 92}, - {UINT64_2PART_C(0x924d692c, a61be758), 269, 100}, - {UINT64_2PART_C(0xda01ee64, 1a708dea), 295, 108}, - {UINT64_2PART_C(0xa26da399, 9aef774a), 322, 116}, - {UINT64_2PART_C(0xf209787b, b47d6b85), 348, 124}, - {UINT64_2PART_C(0xb454e4a1, 79dd1877), 375, 132}, - {UINT64_2PART_C(0x865b8692, 5b9bc5c2), 402, 140}, - {UINT64_2PART_C(0xc83553c5, c8965d3d), 428, 148}, - {UINT64_2PART_C(0x952ab45c, fa97a0b3), 455, 156}, - {UINT64_2PART_C(0xde469fbd, 99a05fe3), 481, 164}, - {UINT64_2PART_C(0xa59bc234, db398c25), 508, 172}, - {UINT64_2PART_C(0xf6c69a72, a3989f5c), 534, 180}, - {UINT64_2PART_C(0xb7dcbf53, 54e9bece), 561, 188}, - {UINT64_2PART_C(0x88fcf317, f22241e2), 588, 196}, - {UINT64_2PART_C(0xcc20ce9b, d35c78a5), 614, 204}, - {UINT64_2PART_C(0x98165af3, 7b2153df), 641, 212}, - {UINT64_2PART_C(0xe2a0b5dc, 971f303a), 667, 220}, - {UINT64_2PART_C(0xa8d9d153, 5ce3b396), 694, 228}, - {UINT64_2PART_C(0xfb9b7cd9, a4a7443c), 720, 236}, - {UINT64_2PART_C(0xbb764c4c, a7a44410), 747, 244}, - {UINT64_2PART_C(0x8bab8eef, b6409c1a), 774, 252}, - {UINT64_2PART_C(0xd01fef10, a657842c), 800, 260}, - {UINT64_2PART_C(0x9b10a4e5, e9913129), 827, 268}, - {UINT64_2PART_C(0xe7109bfb, a19c0c9d), 853, 276}, - {UINT64_2PART_C(0xac2820d9, 623bf429), 880, 284}, - {UINT64_2PART_C(0x80444b5e, 7aa7cf85), 907, 292}, - {UINT64_2PART_C(0xbf21e440, 03acdd2d), 933, 300}, - {UINT64_2PART_C(0x8e679c2f, 5e44ff8f), 960, 308}, - {UINT64_2PART_C(0xd433179d, 9c8cb841), 986, 316}, - {UINT64_2PART_C(0x9e19db92, b4e31ba9), 1013, 324}, - {UINT64_2PART_C(0xeb96bf6e, badf77d9), 1039, 332}, - {UINT64_2PART_C(0xaf87023b, 9bf0ee6b), 1066, 340}, -}; - -static const int kCachedPowersLength = ARRAY_SIZE(kCachedPowers); -static const int kCachedPowersOffset = 348; // -1 * the first decimal_exponent. -static const double kD_1_LOG2_10 = 0.30102999566398114; // 1 / lg(10) -// Difference between the decimal exponents in the table above. -const int PowersOfTenCache::kDecimalExponentDistance = 8; -const int PowersOfTenCache::kMinDecimalExponent = -348; -const int PowersOfTenCache::kMaxDecimalExponent = 340; - -void PowersOfTenCache::GetCachedPowerForBinaryExponentRange( - int min_exponent, - int max_exponent, - DiyFp* power, - int* decimal_exponent) { - int kQ = DiyFp::kSignificandSize; - double k = ceil((min_exponent + kQ - 1) * kD_1_LOG2_10); - int foo = kCachedPowersOffset; - int index = - (foo + static_cast(k) - 1) / kDecimalExponentDistance + 1; - ASSERT(0 <= index && index < kCachedPowersLength); - CachedPower cached_power = kCachedPowers[index]; - ASSERT(min_exponent <= cached_power.binary_exponent); - (void) max_exponent; // Mark variable as used. - ASSERT(cached_power.binary_exponent <= max_exponent); - *decimal_exponent = cached_power.decimal_exponent; - *power = DiyFp(cached_power.significand, cached_power.binary_exponent); -} - - -void PowersOfTenCache::GetCachedPowerForDecimalExponent(int requested_exponent, - DiyFp* power, - int* found_exponent) { - ASSERT(kMinDecimalExponent <= requested_exponent); - ASSERT(requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance); - int index = - (requested_exponent + kCachedPowersOffset) / kDecimalExponentDistance; - CachedPower cached_power = kCachedPowers[index]; - *power = DiyFp(cached_power.significand, cached_power.binary_exponent); - *found_exponent = cached_power.decimal_exponent; - ASSERT(*found_exponent <= requested_exponent); - ASSERT(requested_exponent < *found_exponent + kDecimalExponentDistance); -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/cached-powers.h b/base/poco/Foundation/src/cached-powers.h deleted file mode 100644 index a65092d6cad..00000000000 --- a/base/poco/Foundation/src/cached-powers.h +++ /dev/null @@ -1,60 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_CACHED_POWERS_H_ -#define DOUBLE_CONVERSION_CACHED_POWERS_H_ - -#include "diy-fp.h" - -namespace double_conversion -{ - -class PowersOfTenCache -{ -public: - // Not all powers of ten are cached. The decimal exponent of two neighboring - // cached numbers will differ by kDecimalExponentDistance. - static const int kDecimalExponentDistance; - - static const int kMinDecimalExponent; - static const int kMaxDecimalExponent; - - // Returns a cached power-of-ten with a binary exponent in the range - // [min_exponent; max_exponent] (boundaries included). - static void GetCachedPowerForBinaryExponentRange(int min_exponent, int max_exponent, DiyFp * power, int * decimal_exponent); - - // Returns a cached power of ten x ~= 10^k such that - // k <= decimal_exponent < k + kCachedPowersDecimalDistance. - // The given decimal_exponent must satisfy - // kMinDecimalExponent <= requested_exponent, and - // requested_exponent < kMaxDecimalExponent + kDecimalExponentDistance. - static void GetCachedPowerForDecimalExponent(int requested_exponent, DiyFp * power, int * found_exponent); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_CACHED_POWERS_H_ diff --git a/base/poco/Foundation/src/compress.c b/base/poco/Foundation/src/compress.c deleted file mode 100644 index e2db404abf8..00000000000 --- a/base/poco/Foundation/src/compress.c +++ /dev/null @@ -1,86 +0,0 @@ -/* compress.c -- compress a memory buffer - * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* @(#) $Id$ */ - -#define ZLIB_INTERNAL -#include "zlib.h" - -/* =========================================================================== - Compresses the source buffer into the destination buffer. The level - parameter has the same meaning as in deflateInit. sourceLen is the byte - length of the source buffer. Upon entry, destLen is the total size of the - destination buffer, which must be at least 0.1% larger than sourceLen plus - 12 bytes. Upon exit, destLen is the actual size of the compressed buffer. - - compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough - memory, Z_BUF_ERROR if there was not enough room in the output buffer, - Z_STREAM_ERROR if the level parameter is invalid. -*/ -int ZEXPORT compress2 (dest, destLen, source, sourceLen, level) - Bytef *dest; - uLongf *destLen; - const Bytef *source; - uLong sourceLen; - int level; -{ - z_stream stream; - int err; - const uInt max = (uInt)-1; - uLong left; - - left = *destLen; - *destLen = 0; - - stream.zalloc = (alloc_func)0; - stream.zfree = (free_func)0; - stream.opaque = (voidpf)0; - - err = deflateInit(&stream, level); - if (err != Z_OK) return err; - - stream.next_out = dest; - stream.avail_out = 0; - stream.next_in = (z_const Bytef *)source; - stream.avail_in = 0; - - do { - if (stream.avail_out == 0) { - stream.avail_out = left > (uLong)max ? max : (uInt)left; - left -= stream.avail_out; - } - if (stream.avail_in == 0) { - stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen; - sourceLen -= stream.avail_in; - } - err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH); - } while (err == Z_OK); - - *destLen = stream.total_out; - deflateEnd(&stream); - return err == Z_STREAM_END ? Z_OK : err; -} - -/* =========================================================================== - */ -int ZEXPORT compress (dest, destLen, source, sourceLen) - Bytef *dest; - uLongf *destLen; - const Bytef *source; - uLong sourceLen; -{ - return compress2(dest, destLen, source, sourceLen, Z_DEFAULT_COMPRESSION); -} - -/* =========================================================================== - If the default memLevel or windowBits for deflateInit() is changed, then - this function needs to be updated. - */ -uLong ZEXPORT compressBound (sourceLen) - uLong sourceLen; -{ - return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + - (sourceLen >> 25) + 13; -} diff --git a/base/poco/Foundation/src/crc32.c b/base/poco/Foundation/src/crc32.c deleted file mode 100644 index a0fe210ab1b..00000000000 --- a/base/poco/Foundation/src/crc32.c +++ /dev/null @@ -1,444 +0,0 @@ -/* crc32.c -- compute the CRC-32 of a data stream - * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - * - * Thanks to Rodney Brown for his contribution of faster - * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing - * tables for updating the shift register in one step with three exclusive-ors - * instead of four steps with four exclusive-ors. This results in about a - * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. - */ - -/* @(#) $Id$ */ - -/* - Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore - protection on the static variables used to control the first-use generation - of the crc tables. Therefore, if you #define DYNAMIC_CRC_TABLE, you should - first call get_crc_table() to initialize the tables before allowing more than - one thread to use crc32(). - - DYNAMIC_CRC_TABLE and MAKECRCH can be #defined to write out crc32.h. - */ - -#ifdef MAKECRCH -# include -# ifndef DYNAMIC_CRC_TABLE -# define DYNAMIC_CRC_TABLE -# endif /* !DYNAMIC_CRC_TABLE */ -#endif /* MAKECRCH */ - -#include "zutil.h" /* for STDC and FAR definitions */ - -#define local static - -/* Definitions for doing the crc four data bytes at a time. */ -#if !defined(NOBYFOUR) && defined(Z_U4) -# define BYFOUR -#endif -#ifdef BYFOUR - local unsigned long crc32_little OF((unsigned long, - const unsigned char FAR *, z_size_t)); - local unsigned long crc32_big OF((unsigned long, - const unsigned char FAR *, z_size_t)); -# define TBLS 8 -#else -# define TBLS 1 -#endif /* BYFOUR */ - -/* Local functions for crc concatenation */ -local unsigned long gf2_matrix_times OF((unsigned long *mat, - unsigned long vec)); -local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat)); -local uLong crc32_combine_ OF((uLong crc1, uLong crc2, z_off64_t len2)); - - -#ifdef DYNAMIC_CRC_TABLE - -local volatile int crc_table_empty = 1; -local z_crc_t FAR crc_table[TBLS][256]; -local void make_crc_table OF((void)); -#ifdef MAKECRCH - local void write_table OF((FILE *, const z_crc_t FAR *)); -#endif /* MAKECRCH */ -/* - Generate tables for a byte-wise 32-bit CRC calculation on the polynomial: - x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1. - - Polynomials over GF(2) are represented in binary, one bit per coefficient, - with the lowest powers in the most significant bit. Then adding polynomials - is just exclusive-or, and multiplying a polynomial by x is a right shift by - one. If we call the above polynomial p, and represent a byte as the - polynomial q, also with the lowest power in the most significant bit (so the - byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p, - where a mod b means the remainder after dividing a by b. - - This calculation is done using the shift-register method of multiplying and - taking the remainder. The register is initialized to zero, and for each - incoming bit, x^32 is added mod p to the register if the bit is a one (where - x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by - x (which is shifting right by one and adding x^32 mod p if the bit shifted - out is a one). We start with the highest power (least significant bit) of - q and repeat for all eight bits of q. - - The first table is simply the CRC of all possible eight bit values. This is - all the information needed to generate CRCs on data a byte at a time for all - combinations of CRC register values and incoming bytes. The remaining tables - allow for word-at-a-time CRC calculation for both big-endian and little- - endian machines, where a word is four bytes. -*/ -local void make_crc_table() -{ - z_crc_t c; - int n, k; - z_crc_t poly; /* polynomial exclusive-or pattern */ - /* terms of polynomial defining this crc (except x^32): */ - static volatile int first = 1; /* flag to limit concurrent making */ - static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26}; - - /* See if another task is already doing this (not thread-safe, but better - than nothing -- significantly reduces duration of vulnerability in - case the advice about DYNAMIC_CRC_TABLE is ignored) */ - if (first) { - first = 0; - - /* make exclusive-or pattern from polynomial (0xedb88320UL) */ - poly = 0; - for (n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) - poly |= (z_crc_t)1 << (31 - p[n]); - - /* generate a crc for every 8-bit value */ - for (n = 0; n < 256; n++) { - c = (z_crc_t)n; - for (k = 0; k < 8; k++) - c = c & 1 ? poly ^ (c >> 1) : c >> 1; - crc_table[0][n] = c; - } - -#ifdef BYFOUR - /* generate crc for each value followed by one, two, and three zeros, - and then the byte reversal of those as well as the first table */ - for (n = 0; n < 256; n++) { - c = crc_table[0][n]; - crc_table[4][n] = ZSWAP32(c); - for (k = 1; k < 4; k++) { - c = crc_table[0][c & 0xff] ^ (c >> 8); - crc_table[k][n] = c; - crc_table[k + 4][n] = ZSWAP32(c); - } - } -#endif /* BYFOUR */ - - crc_table_empty = 0; - } - else { /* not first */ - /* wait for the other guy to finish (not efficient, but rare) */ - while (crc_table_empty) - ; - } - -#ifdef MAKECRCH - /* write out CRC tables to crc32.h */ - { - FILE *out; - - out = fopen("crc32.h", "w"); - if (out == NULL) return; - fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n"); - fprintf(out, " * Generated automatically by crc32.c\n */\n\n"); - fprintf(out, "local const z_crc_t FAR "); - fprintf(out, "crc_table[TBLS][256] =\n{\n {\n"); - write_table(out, crc_table[0]); -# ifdef BYFOUR - fprintf(out, "#ifdef BYFOUR\n"); - for (k = 1; k < 8; k++) { - fprintf(out, " },\n {\n"); - write_table(out, crc_table[k]); - } - fprintf(out, "#endif\n"); -# endif /* BYFOUR */ - fprintf(out, " }\n};\n"); - fclose(out); - } -#endif /* MAKECRCH */ -} - -#ifdef MAKECRCH -local void write_table(out, table) - FILE *out; - const z_crc_t FAR *table; -{ - int n; - - for (n = 0; n < 256; n++) - fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : " ", - (unsigned long)(table[n]), - n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", ")); -} -#endif /* MAKECRCH */ - -#else /* !DYNAMIC_CRC_TABLE */ -/* ======================================================================== - * Tables of CRC-32s of all single-byte values, made by make_crc_table(). - */ -#include "crc32.h" -#endif /* DYNAMIC_CRC_TABLE */ - -/* ========================================================================= - * This function can be used by asm versions of crc32() - */ -const z_crc_t FAR * ZEXPORT get_crc_table() -{ -#ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); -#endif /* DYNAMIC_CRC_TABLE */ - return (const z_crc_t FAR *)crc_table; -} - -/* ========================================================================= */ -#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8) -#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1 - -/* ========================================================================= */ -unsigned long ZEXPORT crc32_z(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - if (buf == Z_NULL) return 0UL; - -#ifdef DYNAMIC_CRC_TABLE - if (crc_table_empty) - make_crc_table(); -#endif /* DYNAMIC_CRC_TABLE */ - -#ifdef BYFOUR - if (sizeof(void *) == sizeof(ptrdiff_t)) { - z_crc_t endian; - - endian = 1; - if (*((unsigned char *)(&endian))) - return crc32_little(crc, buf, len); - else - return crc32_big(crc, buf, len); - } -#endif /* BYFOUR */ - crc = crc ^ 0xffffffffUL; - while (len >= 8) { - DO8; - len -= 8; - } - if (len) do { - DO1; - } while (--len); - return crc ^ 0xffffffffUL; -} - -/* ========================================================================= */ -unsigned long ZEXPORT crc32(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - uInt len; -{ - return crc32_z(crc, buf, len); -} - -#ifdef BYFOUR - -/* - This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit - integer pointer type. This violates the strict aliasing rule, where a - compiler can assume, for optimization purposes, that two pointers to - fundamentally different types won't ever point to the same memory. This can - manifest as a problem only if one of the pointers is written to. This code - only reads from those pointers. So long as this code remains isolated in - this compilation unit, there won't be a problem. For this reason, this code - should not be copied and pasted into a compilation unit in which other code - writes to the buffer that is passed to these routines. - */ - -/* ========================================================================= */ -#define DOLIT4 c ^= *buf4++; \ - c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \ - crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24] -#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4 - -/* ========================================================================= */ -local unsigned long crc32_little(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - register z_crc_t c; - register const z_crc_t FAR *buf4; - - c = (z_crc_t)crc; - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - len--; - } - - buf4 = (const z_crc_t FAR *)(const void FAR *)buf; - while (len >= 32) { - DOLIT32; - len -= 32; - } - while (len >= 4) { - DOLIT4; - len -= 4; - } - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8); - } while (--len); - c = ~c; - return (unsigned long)c; -} - -/* ========================================================================= */ -#define DOBIG4 c ^= *buf4++; \ - c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \ - crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24] -#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4 - -/* ========================================================================= */ -local unsigned long crc32_big(crc, buf, len) - unsigned long crc; - const unsigned char FAR *buf; - z_size_t len; -{ - register z_crc_t c; - register const z_crc_t FAR *buf4; - - c = ZSWAP32((z_crc_t)crc); - c = ~c; - while (len && ((ptrdiff_t)buf & 3)) { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - len--; - } - - buf4 = (const z_crc_t FAR *)(const void FAR *)buf; - while (len >= 32) { - DOBIG32; - len -= 32; - } - while (len >= 4) { - DOBIG4; - len -= 4; - } - buf = (const unsigned char FAR *)buf4; - - if (len) do { - c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8); - } while (--len); - c = ~c; - return (unsigned long)(ZSWAP32(c)); -} - -#endif /* BYFOUR */ - -#define GF2_DIM 32 /* dimension of GF(2) vectors (length of CRC) */ - -/* ========================================================================= */ -local unsigned long gf2_matrix_times(mat, vec) - unsigned long *mat; - unsigned long vec; -{ - unsigned long sum; - - sum = 0; - while (vec) { - if (vec & 1) - sum ^= *mat; - vec >>= 1; - mat++; - } - return sum; -} - -/* ========================================================================= */ -local void gf2_matrix_square(square, mat) - unsigned long *square; - unsigned long *mat; -{ - int n; - - for (n = 0; n < GF2_DIM; n++) - square[n] = gf2_matrix_times(mat, mat[n]); -} - -/* ========================================================================= */ -local uLong crc32_combine_(crc1, crc2, len2) - uLong crc1; - uLong crc2; - z_off64_t len2; -{ - int n; - unsigned long row; - unsigned long even[GF2_DIM]; /* even-power-of-two zeros operator */ - unsigned long odd[GF2_DIM]; /* odd-power-of-two zeros operator */ - - /* degenerate case (also disallow negative lengths) */ - if (len2 <= 0) - return crc1; - - /* put operator for one zero bit in odd */ - odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ - row = 1; - for (n = 1; n < GF2_DIM; n++) { - odd[n] = row; - row <<= 1; - } - - /* put operator for two zero bits in even */ - gf2_matrix_square(even, odd); - - /* put operator for four zero bits in odd */ - gf2_matrix_square(odd, even); - - /* apply len2 zeros to crc1 (first square will put the operator for one - zero byte, eight zero bits, in even) */ - do { - /* apply zeros operator for this bit of len2 */ - gf2_matrix_square(even, odd); - if (len2 & 1) - crc1 = gf2_matrix_times(even, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - if (len2 == 0) - break; - - /* another iteration of the loop with odd and even swapped */ - gf2_matrix_square(odd, even); - if (len2 & 1) - crc1 = gf2_matrix_times(odd, crc1); - len2 >>= 1; - - /* if no more bits set, then done */ - } while (len2 != 0); - - /* return combined crc */ - crc1 ^= crc2; - return crc1; -} - -/* ========================================================================= */ -uLong ZEXPORT crc32_combine(crc1, crc2, len2) - uLong crc1; - uLong crc2; - z_off_t len2; -{ - return crc32_combine_(crc1, crc2, len2); -} - -uLong ZEXPORT crc32_combine64(crc1, crc2, len2) - uLong crc1; - uLong crc2; - z_off64_t len2; -{ - return crc32_combine_(crc1, crc2, len2); -} diff --git a/base/poco/Foundation/src/crc32.h b/base/poco/Foundation/src/crc32.h deleted file mode 100644 index 3e00b277e77..00000000000 --- a/base/poco/Foundation/src/crc32.h +++ /dev/null @@ -1,241 +0,0 @@ -/* crc32.h -- tables for rapid CRC calculation - * Generated automatically by crc32.c - */ - -local const z_crc_t FAR crc_table[TBLS][256] - = {{0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, - 0x79dcb8a4UL, 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, - 0xf3b97148UL, 0x84be41deUL, 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, 0x646ba8c0UL, 0xfd62f97aUL, - 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, 0xa2677172UL, - 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, - 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, - 0xcfba9599UL, 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, - 0xb6662d3dUL, 0x76dc4190UL, 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, 0x9fbfe4a5UL, 0xe8b8d433UL, - 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, 0x6b6b51f4UL, - 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, - 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, - 0xd4bb30e2UL, 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, - 0x44042d73UL, 0x33031de5UL, 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, 0xc90c2086UL, 0x5768b525UL, - 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, 0x2eb40d81UL, - 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, - 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, - 0xf00f9344UL, 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, - 0x89d32be0UL, 0x10da7a5aUL, 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, 0xd6d6a3e8UL, 0xa1d1937eUL, - 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, 0x36034af6UL, - 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, - 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, - 0xb5d0cf31UL, 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, - 0x72076785UL, 0x05005713UL, 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, 0xe5d5be0dUL, 0x7cdcefb7UL, - 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, 0x18b74777UL, - 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, - 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, - 0x40df0b66UL, 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, - 0x24b4a3a6UL, 0xbad03605UL, 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, 0x5d681b02UL, 0x2a6f2b94UL, - 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, 0x2d02ef8dUL -#ifdef BYFOUR - }, - {0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, - 0xd1c2bb49UL, 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, - 0x78f470d3UL, 0x61ef4192UL, 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, 0x9b00a918UL, 0xb02dfadbUL, - 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, 0xbea97761UL, - 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, - 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, - 0x891c9175UL, 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, - 0x58de2a3cUL, 0xf0794f05UL, 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, 0xa623e883UL, 0xbf38d9c2UL, - 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, 0xbabb5d54UL, - 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, - 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, - 0x4ed03864UL, 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, - 0xc94824abUL, 0xd05315eaUL, 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, 0x04122a35UL, 0x4b53bcf2UL, - 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, 0x9a9107bbUL, - 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, - 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, - 0x71418a1aUL, 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, - 0xa0833153UL, 0x8bae6290UL, 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, 0xae07bce9UL, 0xb71c8da8UL, - 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, 0x54e85463UL, - 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, - 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, - 0x516bd0f5UL, 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, - 0x9da070c8UL, 0x84bb4189UL, 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, 0x7e54a903UL, 0x5579fac0UL, - 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, 0xce7953d8UL, - 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, - 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, - 0xa4911b66UL, 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, - 0x3f91b27eUL, 0x70d024b9UL, 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, 0xee530937UL, 0xf7483876UL, - 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, 0x9324fd72UL}, - {0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, - 0x0fd13b8fUL, 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, - 0x1fa2771eUL, 0x1e601d29UL, 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, 0x13f798ffUL, 0x11b126a6UL, - 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, 0x3a0bf8b9UL, - 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, - 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, - 0x20e69922UL, 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, - 0x2f37a2adUL, 0x709a8dc0UL, 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, 0x7417f172UL, 0x75d59b45UL, - 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, 0x6cbc2eb0UL, - 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, - 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, - 0x4a917579UL, 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, - 0x41cd3244UL, 0x400f5873UL, 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, 0x56b7d609UL, 0x53f8c08cUL, - 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, 0x5c29fb03UL, - 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, - 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, - 0xfd13b8f0UL, 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, - 0xf2c2837fUL, 0xf0843d26UL, 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, 0xd9785d60UL, 0xd8ba3757UL, - 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, 0xd4efd8b6UL, - 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, - 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, - 0xcd866d43UL, 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, - 0x9522eaf2UL, 0x94e080c5UL, 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, 0x99770513UL, 0x9b31bb4aUL, - 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, 0x88c623b5UL, - 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, - 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, - 0xa4755576UL, 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, - 0xb782cd89UL, 0xb2cddb0cUL, 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, 0xb853f606UL, 0xb9919c31UL, - 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, 0xbe9834edUL}, - {0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, - 0x7d084f8aUL, 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, - 0xfa109f14UL, 0x42acf871UL, 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, 0x2d111815UL, 0x3fa4b7fbUL, - 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, 0xb28700d0UL, - 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, - 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, - 0xd540a77dUL, 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, - 0xa848e8f7UL, 0x9b14583dUL, 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, 0xbe7f07e1UL, 0x06c36084UL, - 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, 0xcb0d0fa2UL, - 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, - 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, - 0x299358edUL, 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, 0x462eb889UL, 0x549b1767UL, 0xec277002UL, - 0x71f048bbUL, 0xc94c2fdeUL, 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, 0x798a0f72UL, 0xe45d37cbUL, - 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, 0x99557841UL, - 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, - 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, - 0xbd40e1a4UL, 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, - 0xc048ae2eUL, 0xd2fd01c0UL, 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, 0x4d6b1905UL, 0xf5d77e60UL, - 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, 0x22d6f961UL, - 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, - 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, - 0xef189647UL, 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, - 0x5326b1daUL, 0xeb9ad6bfUL, 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, 0x842736dbUL, 0x96929935UL, - 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, 0xbb838120UL, - 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, - 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, - 0xb9c2a15cUL, 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, - 0x94d3b949UL, 0x090481f0UL, 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, 0xe9dbf6c3UL, 0x516791a6UL, - 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, 0xde0506f1UL}, - {0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL, 0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, - 0xa4b8dc79UL, 0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL, 0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, - 0x4871b9f3UL, 0xde41be84UL, 0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL, 0xc0a86b64UL, 0x7af962fdUL, - 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL, 0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL, 0x727167a2UL, - 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL, 0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL, - 0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL, 0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, - 0x9995bacfUL, 0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL, 0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, - 0x3d2d66b6UL, 0x9041dc76UL, 0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL, 0xa5e4bf9fUL, 0x33d4b8e8UL, - 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL, 0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL, 0xf4516b6bUL, - 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL, 0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL, - 0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL, 0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, - 0xe230bbd4UL, 0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL, 0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, - 0x732d0444UL, 0xe51d0333UL, 0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL, 0x86200cc9UL, 0x25b56857UL, - 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL, 0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL, 0x810db42eUL, - 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL, 0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL, - 0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL, 0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, - 0x44930ff0UL, 0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL, 0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, - 0xe02bd389UL, 0x5a7ada10UL, 0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL, 0xe8a3d6d6UL, 0x7e93d1a1UL, - 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL, 0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL, 0xf64a0336UL, - 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL, 0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL, - 0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL, 0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, - 0x31cfd0b5UL, 0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL, 0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, - 0x85670772UL, 0x13570005UL, 0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL, 0x0dbed5e5UL, 0xb7efdc7cUL, - 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL, 0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL, 0x7747b718UL, - 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL, 0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL, - 0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL, 0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, - 0x660bdf40UL, 0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL, 0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, - 0xa6a3b424UL, 0x0536d0baUL, 0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL, 0x021b685dUL, 0x942b6f2aUL, - 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL, 0x8def022dUL}, - {0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL, 0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, - 0x49bbc2d1UL, 0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL, 0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, - 0xd370f478UL, 0x9241ef61UL, 0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL, 0x18a9009bUL, 0xdbfa2db0UL, - 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL, 0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL, 0x6177a9beUL, - 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL, 0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL, - 0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL, 0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, - 0x75911c89UL, 0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL, 0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, - 0x3c2ade58UL, 0x054f79f0UL, 0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL, 0x83e823a6UL, 0xc2d938bfUL, - 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL, 0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL, 0x545dbbbaUL, - 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL, 0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL, - 0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL, 0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, - 0x6438d04eUL, 0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL, 0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, - 0xab2448c9UL, 0xea1553d0UL, 0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL, 0x352a1204UL, 0xf2bc534bUL, - 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL, 0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL, 0xbb07919aUL, - 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL, 0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL, - 0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL, 0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, - 0x1a8a4171UL, 0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL, 0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, - 0x533183a0UL, 0x9062ae8bUL, 0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL, 0xe9bc07aeUL, 0xa88d1cb7UL, - 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL, 0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL, 0x6354e854UL, - 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL, 0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL, - 0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL, 0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, - 0xf5d06b51UL, 0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL, 0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, - 0xc870a09dUL, 0x8941bb84UL, 0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL, 0x03a9547eUL, 0xc0fa7955UL, - 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL, 0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL, 0xd85379ceUL, - 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL, 0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL, - 0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL, 0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, - 0x661b91a4UL, 0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL, 0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, - 0x7eb2913fUL, 0xb924d070UL, 0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL, 0x370953eeUL, 0x763848f7UL, - 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL, 0x72fd2493UL}, - {0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL, 0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, - 0x8f3bd10fUL, 0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL, 0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, - 0x1e77a21fUL, 0x291d601eUL, 0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL, 0xff98f713UL, 0xa626b111UL, - 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL, 0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL, 0xb9f80b3aUL, - 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL, 0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL, - 0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL, 0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, - 0x2299e620UL, 0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL, 0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, - 0xada2372fUL, 0xc08d9a70UL, 0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL, 0x72f11774UL, 0x459bd575UL, - 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL, 0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL, 0xb02ebc6cUL, - 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL, 0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL, - 0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL, 0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, - 0x7975914aUL, 0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL, 0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, - 0x4432cd41UL, 0x73580f40UL, 0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL, 0x09d6b756UL, 0x8cc0f853UL, - 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL, 0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL, 0x03fb295cUL, - 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL, 0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL, - 0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL, 0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, - 0xf0b813fdUL, 0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL, 0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, - 0x7f83c2f2UL, 0x263d84f0UL, 0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL, 0x605d78d9UL, 0x5737bad8UL, - 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL, 0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL, 0xb6d8efd4UL, - 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL, 0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL, - 0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL, 0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, - 0x436d86cdUL, 0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL, 0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, - 0xf2ea2295UL, 0xc580e094UL, 0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL, 0x13057799UL, 0x4abb319bUL, - 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL, 0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL, 0xb523c688UL, - 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL, 0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL, - 0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL, 0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, - 0x765575a4UL, 0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL, 0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, - 0x89cd82b7UL, 0x0cdbcdb2UL, 0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL, 0x06f653b8UL, 0x319c91b9UL, - 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL, 0xed3498beUL}, - {0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL, 0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, - 0x8a4f087dUL, 0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL, 0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, - 0x149f10faUL, 0x71f8ac42UL, 0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL, 0x1518112dUL, 0xfbb7a43fUL, - 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL, 0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL, 0xd00087b2UL, - 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL, 0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL, - 0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL, 0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, - 0x7da740d5UL, 0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL, 0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, - 0xf7e848a8UL, 0x3d58149bUL, 0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL, 0xe1077fbeUL, 0x8460c306UL, - 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL, 0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL, 0xa20f0dcbUL, - 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL, 0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL, - 0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL, 0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, - 0xed589329UL, 0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL, 0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, - 0xbb48f071UL, 0xde2f4cc9UL, 0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL, 0x720f8a79UL, 0xcb375de4UL, - 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL, 0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL, 0x41785599UL, - 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL, 0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL, - 0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL, 0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, - 0xa4e140bdUL, 0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL, 0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, - 0x2eae48c0UL, 0xc001fdd2UL, 0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL, 0x05196b4dUL, 0x607ed7f5UL, - 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL, 0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL, 0x61f9d622UL, - 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL, 0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL, - 0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL, 0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, - 0x479618efUL, 0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL, 0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, - 0xdab12653UL, 0xbfd69aebUL, 0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL, 0xdb362784UL, 0x35999296UL, - 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL, 0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL, 0x208183bbUL, - 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL, 0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL, - 0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL, 0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, - 0x5ca1c2b9UL, 0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL, 0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, - 0x49b9d394UL, 0xf0810409UL, 0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL, 0xc3f6dbe9UL, 0xa6916751UL, - 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL, 0xf10605deUL -#endif - }}; diff --git a/base/poco/Foundation/src/deflate.c b/base/poco/Foundation/src/deflate.c deleted file mode 100644 index 0a80edd92d8..00000000000 --- a/base/poco/Foundation/src/deflate.c +++ /dev/null @@ -1,2163 +0,0 @@ -/* deflate.c -- compress data using the deflation algorithm - * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * ALGORITHM - * - * The "deflation" process depends on being able to identify portions - * of the input text which are identical to earlier input (within a - * sliding window trailing behind the input currently being processed). - * - * The most straightforward technique turns out to be the fastest for - * most input files: try all possible matches and select the longest. - * The key feature of this algorithm is that insertions into the string - * dictionary are very simple and thus fast, and deletions are avoided - * completely. Insertions are performed at each input character, whereas - * string matches are performed only when the previous match ends. So it - * is preferable to spend more time in matches to allow very fast string - * insertions and avoid deletions. The matching algorithm for small - * strings is inspired from that of Rabin & Karp. A brute force approach - * is used to find longer strings when a small match has been found. - * A similar algorithm is used in comic (by Jan-Mark Wams) and freeze - * (by Leonid Broukhis). - * A previous version of this file used a more sophisticated algorithm - * (by Fiala and Greene) which is guaranteed to run in linear amortized - * time, but has a larger average cost, uses more memory and is patented. - * However the F&G algorithm may be faster for some highly redundant - * files if the parameter max_chain_length (described below) is too large. - * - * ACKNOWLEDGEMENTS - * - * The idea of lazy evaluation of matches is due to Jan-Mark Wams, and - * I found it in 'freeze' written by Leonid Broukhis. - * Thanks to many people for bug reports and testing. - * - * REFERENCES - * - * Deutsch, L.P.,"DEFLATE Compressed Data Format Specification". - * Available in http://tools.ietf.org/html/rfc1951 - * - * A description of the Rabin and Karp algorithm is given in the book - * "Algorithms" by R. Sedgewick, Addison-Wesley, p252. - * - * Fiala,E.R., and Greene,D.H. - * Data Compression with Finite Windows, Comm.ACM, 32,4 (1989) 490-595 - * - */ - -/* @(#) $Id$ */ - -#include "deflate.h" - -const char deflate_copyright[] = - " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler "; -/* - If you use the zlib library in a product, an acknowledgment is welcome - in the documentation of your product. If for some reason you cannot - include such an acknowledgment, I would appreciate that you keep this - copyright string in the executable of your product. - */ - -/* =========================================================================== - * Function prototypes. - */ -typedef enum { - need_more, /* block not completed, need more input or more output */ - block_done, /* block flush performed */ - finish_started, /* finish started, need only more output at next deflate */ - finish_done /* finish done, accept no more input or output */ -} block_state; - -typedef block_state (*compress_func) OF((deflate_state *s, int flush)); -/* Compression function. Returns the block state after the call. */ - -local int deflateStateCheck OF((z_streamp strm)); -local void slide_hash OF((deflate_state *s)); -local void fill_window OF((deflate_state *s)); -local block_state deflate_stored OF((deflate_state *s, int flush)); -local block_state deflate_fast OF((deflate_state *s, int flush)); -#ifndef FASTEST -local block_state deflate_slow OF((deflate_state *s, int flush)); -#endif -local block_state deflate_rle OF((deflate_state *s, int flush)); -local block_state deflate_huff OF((deflate_state *s, int flush)); -local void lm_init OF((deflate_state *s)); -local void putShortMSB OF((deflate_state *s, uInt b)); -local void flush_pending OF((z_streamp strm)); -local unsigned read_buf OF((z_streamp strm, Bytef *buf, unsigned size)); -#ifdef ASMV -# pragma message("Assembler code may have bugs -- use at your own risk") - void match_init OF((void)); /* asm code initialization */ - uInt longest_match OF((deflate_state *s, IPos cur_match)); -#else -local uInt longest_match OF((deflate_state *s, IPos cur_match)); -#endif - -#ifdef ZLIB_DEBUG -local void check_match OF((deflate_state *s, IPos start, IPos match, - int length)); -#endif - -/* =========================================================================== - * Local data - */ - -#define NIL 0 -/* Tail of hash chains */ - -#ifndef TOO_FAR -# define TOO_FAR 4096 -#endif -/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ - -/* Values for max_lazy_match, good_match and max_chain_length, depending on - * the desired pack level (0..9). The values given below have been tuned to - * exclude worst case performance for pathological files. Better values may be - * found for specific files. - */ -typedef struct config_s { - ush good_length; /* reduce lazy search above this match length */ - ush max_lazy; /* do not perform lazy search above this match length */ - ush nice_length; /* quit search above this match length */ - ush max_chain; - compress_func func; -} config; - -#ifdef FASTEST -local const config configuration_table[2] = { -/* good lazy nice chain */ -/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ -/* 1 */ {4, 4, 8, 4, deflate_fast}}; /* max speed, no lazy matches */ -#else -local const config configuration_table[10] = { -/* good lazy nice chain */ -/* 0 */ {0, 0, 0, 0, deflate_stored}, /* store only */ -/* 1 */ {4, 4, 8, 4, deflate_fast}, /* max speed, no lazy matches */ -/* 2 */ {4, 5, 16, 8, deflate_fast}, -/* 3 */ {4, 6, 32, 32, deflate_fast}, - -/* 4 */ {4, 4, 16, 16, deflate_slow}, /* lazy matches */ -/* 5 */ {8, 16, 32, 32, deflate_slow}, -/* 6 */ {8, 16, 128, 128, deflate_slow}, -/* 7 */ {8, 32, 128, 256, deflate_slow}, -/* 8 */ {32, 128, 258, 1024, deflate_slow}, -/* 9 */ {32, 258, 258, 4096, deflate_slow}}; /* max compression */ -#endif - -/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 - * For deflate_fast() (levels <= 3) good is ignored and lazy has a different - * meaning. - */ - -/* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */ -#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0)) - -/* =========================================================================== - * Update a hash value with the given input byte - * IN assertion: all calls to UPDATE_HASH are made with consecutive input - * characters, so that a running hash key can be computed from the previous - * key instead of complete recalculation each time. - */ -#define UPDATE_HASH(s,h,c) (h = (((h)<hash_shift) ^ (c)) & s->hash_mask) - - -/* =========================================================================== - * Insert string str in the dictionary and set match_head to the previous head - * of the hash chain (the most recent string with same hash key). Return - * the previous length of the hash chain. - * If this file is compiled with -DFASTEST, the compression level is forced - * to 1, and no hash chains are maintained. - * IN assertion: all calls to INSERT_STRING are made with consecutive input - * characters and the first MIN_MATCH bytes of str are valid (except for - * the last MIN_MATCH-1 bytes of the input file). - */ -#ifdef FASTEST -#define INSERT_STRING(s, str, match_head) \ - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ - match_head = s->head[s->ins_h], \ - s->head[s->ins_h] = (Pos)(str)) -#else -#define INSERT_STRING(s, str, match_head) \ - (UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH-1)]), \ - match_head = s->prev[(str) & s->w_mask] = s->head[s->ins_h], \ - s->head[s->ins_h] = (Pos)(str)) -#endif - -/* =========================================================================== - * Initialize the hash table (avoiding 64K overflow for 16 bit systems). - * prev[] will be initialized on the fly. - */ -#define CLEAR_HASH(s) \ - s->head[s->hash_size-1] = NIL; \ - zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head)); - -/* =========================================================================== - * Slide the hash table when sliding the window down (could be avoided with 32 - * bit values at the expense of memory usage). We slide even when level == 0 to - * keep the hash table consistent if we switch back to level > 0 later. - */ -local void slide_hash(s) - deflate_state *s; -{ - unsigned n, m; - Posf *p; - uInt wsize = s->w_size; - - n = s->hash_size; - p = &s->head[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m - wsize : NIL); - } while (--n); - n = wsize; -#ifndef FASTEST - p = &s->prev[n]; - do { - m = *--p; - *p = (Pos)(m >= wsize ? m - wsize : NIL); - /* If n is not on any hash chain, prev[n] is garbage but - * its value will never be used. - */ - } while (--n); -#endif -} - -/* ========================================================================= */ -int ZEXPORT deflateInit_(strm, level, version, stream_size) - z_streamp strm; - int level; - const char *version; - int stream_size; -{ - return deflateInit2_(strm, level, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, - Z_DEFAULT_STRATEGY, version, stream_size); - /* To do: ignore strm->next_in if we use it as window */ -} - -/* ========================================================================= */ -int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy, - version, stream_size) - z_streamp strm; - int level; - int method; - int windowBits; - int memLevel; - int strategy; - const char *version; - int stream_size; -{ - deflate_state *s; - int wrap = 1; - static const char my_version[] = ZLIB_VERSION; - - ushf *overlay; - /* We overlay pending_buf and d_buf+l_buf. This works since the average - * output size for (length,distance) codes is <= 24 bits. - */ - - if (version == Z_NULL || version[0] != my_version[0] || - stream_size != sizeof(z_stream)) { - return Z_VERSION_ERROR; - } - if (strm == Z_NULL) return Z_STREAM_ERROR; - - strm->msg = Z_NULL; - if (strm->zalloc == (alloc_func)0) { -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zalloc = zcalloc; - strm->opaque = (voidpf)0; -#endif - } - if (strm->zfree == (free_func)0) -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zfree = zcfree; -#endif - -#ifdef FASTEST - if (level != 0) level = 1; -#else - if (level == Z_DEFAULT_COMPRESSION) level = 6; -#endif - - if (windowBits < 0) { /* suppress zlib wrapper */ - wrap = 0; - windowBits = -windowBits; - } -#ifdef GZIP - else if (windowBits > 15) { - wrap = 2; /* write gzip wrapper instead */ - windowBits -= 16; - } -#endif - if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED || - windowBits < 8 || windowBits > 15 || level < 0 || level > 9 || - strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) { - return Z_STREAM_ERROR; - } - if (windowBits == 8) windowBits = 9; /* until 256-byte window bug fixed */ - s = (deflate_state *) ZALLOC(strm, 1, sizeof(deflate_state)); - if (s == Z_NULL) return Z_MEM_ERROR; - strm->state = (struct internal_state FAR *)s; - s->strm = strm; - s->status = INIT_STATE; /* to pass state test in deflateReset() */ - - s->wrap = wrap; - s->gzhead = Z_NULL; - s->w_bits = (uInt)windowBits; - s->w_size = 1 << s->w_bits; - s->w_mask = s->w_size - 1; - - s->hash_bits = (uInt)memLevel + 7; - s->hash_size = 1 << s->hash_bits; - s->hash_mask = s->hash_size - 1; - s->hash_shift = ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH); - - s->window = (Bytef *) ZALLOC(strm, s->w_size, 2*sizeof(Byte)); - s->prev = (Posf *) ZALLOC(strm, s->w_size, sizeof(Pos)); - s->head = (Posf *) ZALLOC(strm, s->hash_size, sizeof(Pos)); - - s->high_water = 0; /* nothing written to s->window yet */ - - s->lit_bufsize = 1 << (memLevel + 6); /* 16K elements by default */ - - overlay = (ushf *) ZALLOC(strm, s->lit_bufsize, sizeof(ush)+2); - s->pending_buf = (uchf *) overlay; - s->pending_buf_size = (ulg)s->lit_bufsize * (sizeof(ush)+2L); - - if (s->window == Z_NULL || s->prev == Z_NULL || s->head == Z_NULL || - s->pending_buf == Z_NULL) { - s->status = FINISH_STATE; - strm->msg = ERR_MSG(Z_MEM_ERROR); - deflateEnd (strm); - return Z_MEM_ERROR; - } - s->d_buf = overlay + s->lit_bufsize/sizeof(ush); - s->l_buf = s->pending_buf + (1+sizeof(ush))*s->lit_bufsize; - - s->level = level; - s->strategy = strategy; - s->method = (Byte)method; - - return deflateReset(strm); -} - -/* ========================================================================= - * Check for a valid deflate stream state. Return 0 if ok, 1 if not. - */ -local int deflateStateCheck (strm) - z_streamp strm; -{ - deflate_state *s; - if (strm == Z_NULL || - strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) - return 1; - s = strm->state; - if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE && -#ifdef GZIP - s->status != GZIP_STATE && -#endif - s->status != EXTRA_STATE && - s->status != NAME_STATE && - s->status != COMMENT_STATE && - s->status != HCRC_STATE && - s->status != BUSY_STATE && - s->status != FINISH_STATE)) - return 1; - return 0; -} - -/* ========================================================================= */ -int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength) - z_streamp strm; - const Bytef *dictionary; - uInt dictLength; -{ - deflate_state *s; - uInt str, n; - int wrap; - unsigned avail; - z_const unsigned char *next; - - if (deflateStateCheck(strm) || dictionary == Z_NULL) - return Z_STREAM_ERROR; - s = strm->state; - wrap = s->wrap; - if (wrap == 2 || (wrap == 1 && s->status != INIT_STATE) || s->lookahead) - return Z_STREAM_ERROR; - - /* when using zlib wrappers, compute Adler-32 for provided dictionary */ - if (wrap == 1) - strm->adler = adler32(strm->adler, dictionary, dictLength); - s->wrap = 0; /* avoid computing Adler-32 in read_buf */ - - /* if dictionary would fill window, just replace the history */ - if (dictLength >= s->w_size) { - if (wrap == 0) { /* already empty otherwise */ - CLEAR_HASH(s); - s->strstart = 0; - s->block_start = 0L; - s->insert = 0; - } - dictionary += dictLength - s->w_size; /* use the tail */ - dictLength = s->w_size; - } - - /* insert dictionary into window and hash */ - avail = strm->avail_in; - next = strm->next_in; - strm->avail_in = dictLength; - strm->next_in = (z_const Bytef *)dictionary; - fill_window(s); - while (s->lookahead >= MIN_MATCH) { - str = s->strstart; - n = s->lookahead - (MIN_MATCH-1); - do { - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); -#ifndef FASTEST - s->prev[str & s->w_mask] = s->head[s->ins_h]; -#endif - s->head[s->ins_h] = (Pos)str; - str++; - } while (--n); - s->strstart = str; - s->lookahead = MIN_MATCH-1; - fill_window(s); - } - s->strstart += s->lookahead; - s->block_start = (long)s->strstart; - s->insert = s->lookahead; - s->lookahead = 0; - s->match_length = s->prev_length = MIN_MATCH-1; - s->match_available = 0; - strm->next_in = next; - strm->avail_in = avail; - s->wrap = wrap; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength) - z_streamp strm; - Bytef *dictionary; - uInt *dictLength; -{ - deflate_state *s; - uInt len; - - if (deflateStateCheck(strm)) - return Z_STREAM_ERROR; - s = strm->state; - len = s->strstart + s->lookahead; - if (len > s->w_size) - len = s->w_size; - if (dictionary != Z_NULL && len) - zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len); - if (dictLength != Z_NULL) - *dictLength = len; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateResetKeep (strm) - z_streamp strm; -{ - deflate_state *s; - - if (deflateStateCheck(strm)) { - return Z_STREAM_ERROR; - } - - strm->total_in = strm->total_out = 0; - strm->msg = Z_NULL; /* use zfree if we ever allocate msg dynamically */ - strm->data_type = Z_UNKNOWN; - - s = (deflate_state *)strm->state; - s->pending = 0; - s->pending_out = s->pending_buf; - - if (s->wrap < 0) { - s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */ - } - s->status = -#ifdef GZIP - s->wrap == 2 ? GZIP_STATE : -#endif - s->wrap ? INIT_STATE : BUSY_STATE; - strm->adler = -#ifdef GZIP - s->wrap == 2 ? crc32(0L, Z_NULL, 0) : -#endif - adler32(0L, Z_NULL, 0); - s->last_flush = Z_NO_FLUSH; - - _tr_init(s); - - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateReset (strm) - z_streamp strm; -{ - int ret; - - ret = deflateResetKeep(strm); - if (ret == Z_OK) - lm_init(strm->state); - return ret; -} - -/* ========================================================================= */ -int ZEXPORT deflateSetHeader (strm, head) - z_streamp strm; - gz_headerp head; -{ - if (deflateStateCheck(strm) || strm->state->wrap != 2) - return Z_STREAM_ERROR; - strm->state->gzhead = head; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflatePending (strm, pending, bits) - unsigned *pending; - int *bits; - z_streamp strm; -{ - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - if (pending != Z_NULL) - *pending = strm->state->pending; - if (bits != Z_NULL) - *bits = strm->state->bi_valid; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflatePrime (strm, bits, value) - z_streamp strm; - int bits; - int value; -{ - deflate_state *s; - int put; - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - s = strm->state; - if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3)) - return Z_BUF_ERROR; - do { - put = Buf_size - s->bi_valid; - if (put > bits) - put = bits; - s->bi_buf |= (ush)((value & ((1 << put) - 1)) << s->bi_valid); - s->bi_valid += put; - _tr_flush_bits(s); - value >>= put; - bits -= put; - } while (bits); - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateParams(strm, level, strategy) - z_streamp strm; - int level; - int strategy; -{ - deflate_state *s; - compress_func func; - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - s = strm->state; - -#ifdef FASTEST - if (level != 0) level = 1; -#else - if (level == Z_DEFAULT_COMPRESSION) level = 6; -#endif - if (level < 0 || level > 9 || strategy < 0 || strategy > Z_FIXED) { - return Z_STREAM_ERROR; - } - func = configuration_table[s->level].func; - - if ((strategy != s->strategy || func != configuration_table[level].func) && - s->high_water) { - /* Flush the last buffer: */ - int err = deflate(strm, Z_BLOCK); - if (err == Z_STREAM_ERROR) - return err; - if (strm->avail_out == 0) - return Z_BUF_ERROR; - } - if (s->level != level) { - if (s->level == 0 && s->matches != 0) { - if (s->matches == 1) - slide_hash(s); - else - CLEAR_HASH(s); - s->matches = 0; - } - s->level = level; - s->max_lazy_match = configuration_table[level].max_lazy; - s->good_match = configuration_table[level].good_length; - s->nice_match = configuration_table[level].nice_length; - s->max_chain_length = configuration_table[level].max_chain; - } - s->strategy = strategy; - return Z_OK; -} - -/* ========================================================================= */ -int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain) - z_streamp strm; - int good_length; - int max_lazy; - int nice_length; - int max_chain; -{ - deflate_state *s; - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - s = strm->state; - s->good_match = (uInt)good_length; - s->max_lazy_match = (uInt)max_lazy; - s->nice_match = nice_length; - s->max_chain_length = (uInt)max_chain; - return Z_OK; -} - -/* ========================================================================= - * For the default windowBits of 15 and memLevel of 8, this function returns - * a close to exact, as well as small, upper bound on the compressed size. - * They are coded as constants here for a reason--if the #define's are - * changed, then this function needs to be changed as well. The return - * value for 15 and 8 only works for those exact settings. - * - * For any setting other than those defaults for windowBits and memLevel, - * the value returned is a conservative worst case for the maximum expansion - * resulting from using fixed blocks instead of stored blocks, which deflate - * can emit on compressed data for some combinations of the parameters. - * - * This function could be more sophisticated to provide closer upper bounds for - * every combination of windowBits and memLevel. But even the conservative - * upper bound of about 14% expansion does not seem onerous for output buffer - * allocation. - */ -uLong ZEXPORT deflateBound(strm, sourceLen) - z_streamp strm; - uLong sourceLen; -{ - deflate_state *s; - uLong complen, wraplen; - - /* conservative upper bound for compressed data */ - complen = sourceLen + - ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5; - - /* if can't get parameters, return conservative bound plus zlib wrapper */ - if (deflateStateCheck(strm)) - return complen + 6; - - /* compute wrapper length */ - s = strm->state; - switch (s->wrap) { - case 0: /* raw deflate */ - wraplen = 0; - break; - case 1: /* zlib wrapper */ - wraplen = 6 + (s->strstart ? 4 : 0); - break; -#ifdef GZIP - case 2: /* gzip wrapper */ - wraplen = 18; - if (s->gzhead != Z_NULL) { /* user-supplied gzip header */ - Bytef *str; - if (s->gzhead->extra != Z_NULL) - wraplen += 2 + s->gzhead->extra_len; - str = s->gzhead->name; - if (str != Z_NULL) - do { - wraplen++; - } while (*str++); - str = s->gzhead->comment; - if (str != Z_NULL) - do { - wraplen++; - } while (*str++); - if (s->gzhead->hcrc) - wraplen += 2; - } - break; -#endif - default: /* for compiler happiness */ - wraplen = 6; - } - - /* if not default parameters, return conservative bound */ - if (s->w_bits != 15 || s->hash_bits != 8 + 7) - return complen + wraplen; - - /* default settings: return tight bound for that case */ - return sourceLen + (sourceLen >> 12) + (sourceLen >> 14) + - (sourceLen >> 25) + 13 - 6 + wraplen; -} - -/* ========================================================================= - * Put a short in the pending buffer. The 16-bit value is put in MSB order. - * IN assertion: the stream state is correct and there is enough room in - * pending_buf. - */ -local void putShortMSB (s, b) - deflate_state *s; - uInt b; -{ - put_byte(s, (Byte)(b >> 8)); - put_byte(s, (Byte)(b & 0xff)); -} - -/* ========================================================================= - * Flush as much pending output as possible. All deflate() output, except for - * some deflate_stored() output, goes through this function so some - * applications may wish to modify it to avoid allocating a large - * strm->next_out buffer and copying into it. (See also read_buf()). - */ -local void flush_pending(strm) - z_streamp strm; -{ - unsigned len; - deflate_state *s = strm->state; - - _tr_flush_bits(s); - len = s->pending; - if (len > strm->avail_out) len = strm->avail_out; - if (len == 0) return; - - zmemcpy(strm->next_out, s->pending_out, len); - strm->next_out += len; - s->pending_out += len; - strm->total_out += len; - strm->avail_out -= len; - s->pending -= len; - if (s->pending == 0) { - s->pending_out = s->pending_buf; - } -} - -/* =========================================================================== - * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1]. - */ -#define HCRC_UPDATE(beg) \ - do { \ - if (s->gzhead->hcrc && s->pending > (beg)) \ - strm->adler = crc32(strm->adler, s->pending_buf + (beg), \ - s->pending - (beg)); \ - } while (0) - -/* ========================================================================= */ -int ZEXPORT deflate (strm, flush) - z_streamp strm; - int flush; -{ - int old_flush; /* value of flush param for previous deflate call */ - deflate_state *s; - - if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) { - return Z_STREAM_ERROR; - } - s = strm->state; - - if (strm->next_out == Z_NULL || - (strm->avail_in != 0 && strm->next_in == Z_NULL) || - (s->status == FINISH_STATE && flush != Z_FINISH)) { - ERR_RETURN(strm, Z_STREAM_ERROR); - } - if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR); - - old_flush = s->last_flush; - s->last_flush = flush; - - /* Flush as much pending output as possible */ - if (s->pending != 0) { - flush_pending(strm); - if (strm->avail_out == 0) { - /* Since avail_out is 0, deflate will be called again with - * more output space, but possibly with both pending and - * avail_in equal to zero. There won't be anything to do, - * but this is not an error situation so make sure we - * return OK instead of BUF_ERROR at next call of deflate: - */ - s->last_flush = -1; - return Z_OK; - } - - /* Make sure there is something to do and avoid duplicate consecutive - * flushes. For repeated and useless calls with Z_FINISH, we keep - * returning Z_STREAM_END instead of Z_BUF_ERROR. - */ - } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) && - flush != Z_FINISH) { - ERR_RETURN(strm, Z_BUF_ERROR); - } - - /* User must not provide more input after the first FINISH: */ - if (s->status == FINISH_STATE && strm->avail_in != 0) { - ERR_RETURN(strm, Z_BUF_ERROR); - } - - /* Write the header */ - if (s->status == INIT_STATE) { - /* zlib header */ - uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8; - uInt level_flags; - - if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2) - level_flags = 0; - else if (s->level < 6) - level_flags = 1; - else if (s->level == 6) - level_flags = 2; - else - level_flags = 3; - header |= (level_flags << 6); - if (s->strstart != 0) header |= PRESET_DICT; - header += 31 - (header % 31); - - putShortMSB(s, header); - - /* Save the adler32 of the preset dictionary: */ - if (s->strstart != 0) { - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); - } - strm->adler = adler32(0L, Z_NULL, 0); - s->status = BUSY_STATE; - - /* Compression must start with an empty pending buffer */ - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - } -#ifdef GZIP - if (s->status == GZIP_STATE) { - /* gzip header */ - strm->adler = crc32(0L, Z_NULL, 0); - put_byte(s, 31); - put_byte(s, 139); - put_byte(s, 8); - if (s->gzhead == Z_NULL) { - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, 0); - put_byte(s, s->level == 9 ? 2 : - (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? - 4 : 0)); - put_byte(s, OS_CODE); - s->status = BUSY_STATE; - - /* Compression must start with an empty pending buffer */ - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - } - else { - put_byte(s, (s->gzhead->text ? 1 : 0) + - (s->gzhead->hcrc ? 2 : 0) + - (s->gzhead->extra == Z_NULL ? 0 : 4) + - (s->gzhead->name == Z_NULL ? 0 : 8) + - (s->gzhead->comment == Z_NULL ? 0 : 16) - ); - put_byte(s, (Byte)(s->gzhead->time & 0xff)); - put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff)); - put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff)); - put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff)); - put_byte(s, s->level == 9 ? 2 : - (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ? - 4 : 0)); - put_byte(s, s->gzhead->os & 0xff); - if (s->gzhead->extra != Z_NULL) { - put_byte(s, s->gzhead->extra_len & 0xff); - put_byte(s, (s->gzhead->extra_len >> 8) & 0xff); - } - if (s->gzhead->hcrc) - strm->adler = crc32(strm->adler, s->pending_buf, - s->pending); - s->gzindex = 0; - s->status = EXTRA_STATE; - } - } - if (s->status == EXTRA_STATE) { - if (s->gzhead->extra != Z_NULL) { - ulg beg = s->pending; /* start of bytes to update crc */ - uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex; - while (s->pending + left > s->pending_buf_size) { - uInt copy = s->pending_buf_size - s->pending; - zmemcpy(s->pending_buf + s->pending, - s->gzhead->extra + s->gzindex, copy); - s->pending = s->pending_buf_size; - HCRC_UPDATE(beg); - s->gzindex += copy; - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - beg = 0; - left -= copy; - } - zmemcpy(s->pending_buf + s->pending, - s->gzhead->extra + s->gzindex, left); - s->pending += left; - HCRC_UPDATE(beg); - s->gzindex = 0; - } - s->status = NAME_STATE; - } - if (s->status == NAME_STATE) { - if (s->gzhead->name != Z_NULL) { - ulg beg = s->pending; /* start of bytes to update crc */ - int val; - do { - if (s->pending == s->pending_buf_size) { - HCRC_UPDATE(beg); - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - beg = 0; - } - val = s->gzhead->name[s->gzindex++]; - put_byte(s, val); - } while (val != 0); - HCRC_UPDATE(beg); - s->gzindex = 0; - } - s->status = COMMENT_STATE; - } - if (s->status == COMMENT_STATE) { - if (s->gzhead->comment != Z_NULL) { - ulg beg = s->pending; /* start of bytes to update crc */ - int val; - do { - if (s->pending == s->pending_buf_size) { - HCRC_UPDATE(beg); - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - beg = 0; - } - val = s->gzhead->comment[s->gzindex++]; - put_byte(s, val); - } while (val != 0); - HCRC_UPDATE(beg); - } - s->status = HCRC_STATE; - } - if (s->status == HCRC_STATE) { - if (s->gzhead->hcrc) { - if (s->pending + 2 > s->pending_buf_size) { - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - } - put_byte(s, (Byte)(strm->adler & 0xff)); - put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); - strm->adler = crc32(0L, Z_NULL, 0); - } - s->status = BUSY_STATE; - - /* Compression must start with an empty pending buffer */ - flush_pending(strm); - if (s->pending != 0) { - s->last_flush = -1; - return Z_OK; - } - } -#endif - - /* Start a new block or continue the current one. - */ - if (strm->avail_in != 0 || s->lookahead != 0 || - (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) { - block_state bstate; - - bstate = s->level == 0 ? deflate_stored(s, flush) : - s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) : - s->strategy == Z_RLE ? deflate_rle(s, flush) : - (*(configuration_table[s->level].func))(s, flush); - - if (bstate == finish_started || bstate == finish_done) { - s->status = FINISH_STATE; - } - if (bstate == need_more || bstate == finish_started) { - if (strm->avail_out == 0) { - s->last_flush = -1; /* avoid BUF_ERROR next call, see above */ - } - return Z_OK; - /* If flush != Z_NO_FLUSH && avail_out == 0, the next call - * of deflate should use the same flush parameter to make sure - * that the flush is complete. So we don't have to output an - * empty block here, this will be done at next call. This also - * ensures that for a very small output buffer, we emit at most - * one empty block. - */ - } - if (bstate == block_done) { - if (flush == Z_PARTIAL_FLUSH) { - _tr_align(s); - } else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */ - _tr_stored_block(s, (char*)0, 0L, 0); - /* For a full flush, this empty block will be recognized - * as a special marker by inflate_sync(). - */ - if (flush == Z_FULL_FLUSH) { - CLEAR_HASH(s); /* forget history */ - if (s->lookahead == 0) { - s->strstart = 0; - s->block_start = 0L; - s->insert = 0; - } - } - } - flush_pending(strm); - if (strm->avail_out == 0) { - s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */ - return Z_OK; - } - } - } - - if (flush != Z_FINISH) return Z_OK; - if (s->wrap <= 0) return Z_STREAM_END; - - /* Write the trailer */ -#ifdef GZIP - if (s->wrap == 2) { - put_byte(s, (Byte)(strm->adler & 0xff)); - put_byte(s, (Byte)((strm->adler >> 8) & 0xff)); - put_byte(s, (Byte)((strm->adler >> 16) & 0xff)); - put_byte(s, (Byte)((strm->adler >> 24) & 0xff)); - put_byte(s, (Byte)(strm->total_in & 0xff)); - put_byte(s, (Byte)((strm->total_in >> 8) & 0xff)); - put_byte(s, (Byte)((strm->total_in >> 16) & 0xff)); - put_byte(s, (Byte)((strm->total_in >> 24) & 0xff)); - } - else -#endif - { - putShortMSB(s, (uInt)(strm->adler >> 16)); - putShortMSB(s, (uInt)(strm->adler & 0xffff)); - } - flush_pending(strm); - /* If avail_out is zero, the application will call deflate again - * to flush the rest. - */ - if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */ - return s->pending != 0 ? Z_OK : Z_STREAM_END; -} - -/* ========================================================================= */ -int ZEXPORT deflateEnd (strm) - z_streamp strm; -{ - int status; - - if (deflateStateCheck(strm)) return Z_STREAM_ERROR; - - status = strm->state->status; - - /* Deallocate in reverse order of allocations: */ - TRY_FREE(strm, strm->state->pending_buf); - TRY_FREE(strm, strm->state->head); - TRY_FREE(strm, strm->state->prev); - TRY_FREE(strm, strm->state->window); - - ZFREE(strm, strm->state); - strm->state = Z_NULL; - - return status == BUSY_STATE ? Z_DATA_ERROR : Z_OK; -} - -/* ========================================================================= - * Copy the source state to the destination state. - * To simplify the source, this is not supported for 16-bit MS-DOS (which - * doesn't have enough memory anyway to duplicate compression states). - */ -int ZEXPORT deflateCopy (dest, source) - z_streamp dest; - z_streamp source; -{ -#ifdef MAXSEG_64K - return Z_STREAM_ERROR; -#else - deflate_state *ds; - deflate_state *ss; - ushf *overlay; - - - if (deflateStateCheck(source) || dest == Z_NULL) { - return Z_STREAM_ERROR; - } - - ss = source->state; - - zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); - - ds = (deflate_state *) ZALLOC(dest, 1, sizeof(deflate_state)); - if (ds == Z_NULL) return Z_MEM_ERROR; - dest->state = (struct internal_state FAR *) ds; - zmemcpy((voidpf)ds, (voidpf)ss, sizeof(deflate_state)); - ds->strm = dest; - - ds->window = (Bytef *) ZALLOC(dest, ds->w_size, 2*sizeof(Byte)); - ds->prev = (Posf *) ZALLOC(dest, ds->w_size, sizeof(Pos)); - ds->head = (Posf *) ZALLOC(dest, ds->hash_size, sizeof(Pos)); - overlay = (ushf *) ZALLOC(dest, ds->lit_bufsize, sizeof(ush)+2); - ds->pending_buf = (uchf *) overlay; - - if (ds->window == Z_NULL || ds->prev == Z_NULL || ds->head == Z_NULL || - ds->pending_buf == Z_NULL) { - deflateEnd (dest); - return Z_MEM_ERROR; - } - /* following zmemcpy do not work for 16-bit MS-DOS */ - zmemcpy(ds->window, ss->window, ds->w_size * 2 * sizeof(Byte)); - zmemcpy((voidpf)ds->prev, (voidpf)ss->prev, ds->w_size * sizeof(Pos)); - zmemcpy((voidpf)ds->head, (voidpf)ss->head, ds->hash_size * sizeof(Pos)); - zmemcpy(ds->pending_buf, ss->pending_buf, (uInt)ds->pending_buf_size); - - ds->pending_out = ds->pending_buf + (ss->pending_out - ss->pending_buf); - ds->d_buf = overlay + ds->lit_bufsize/sizeof(ush); - ds->l_buf = ds->pending_buf + (1+sizeof(ush))*ds->lit_bufsize; - - ds->l_desc.dyn_tree = ds->dyn_ltree; - ds->d_desc.dyn_tree = ds->dyn_dtree; - ds->bl_desc.dyn_tree = ds->bl_tree; - - return Z_OK; -#endif /* MAXSEG_64K */ -} - -/* =========================================================================== - * Read a new buffer from the current input stream, update the adler32 - * and total number of bytes read. All deflate() input goes through - * this function so some applications may wish to modify it to avoid - * allocating a large strm->next_in buffer and copying from it. - * (See also flush_pending()). - */ -local unsigned read_buf(strm, buf, size) - z_streamp strm; - Bytef *buf; - unsigned size; -{ - unsigned len = strm->avail_in; - - if (len > size) len = size; - if (len == 0) return 0; - - strm->avail_in -= len; - - zmemcpy(buf, strm->next_in, len); - if (strm->state->wrap == 1) { - strm->adler = adler32(strm->adler, buf, len); - } -#ifdef GZIP - else if (strm->state->wrap == 2) { - strm->adler = crc32(strm->adler, buf, len); - } -#endif - strm->next_in += len; - strm->total_in += len; - - return len; -} - -/* =========================================================================== - * Initialize the "longest match" routines for a new zlib stream - */ -local void lm_init (s) - deflate_state *s; -{ - s->window_size = (ulg)2L*s->w_size; - - CLEAR_HASH(s); - - /* Set the default configuration parameters: - */ - s->max_lazy_match = configuration_table[s->level].max_lazy; - s->good_match = configuration_table[s->level].good_length; - s->nice_match = configuration_table[s->level].nice_length; - s->max_chain_length = configuration_table[s->level].max_chain; - - s->strstart = 0; - s->block_start = 0L; - s->lookahead = 0; - s->insert = 0; - s->match_length = s->prev_length = MIN_MATCH-1; - s->match_available = 0; - s->ins_h = 0; -#ifndef FASTEST -#ifdef ASMV - match_init(); /* initialize the asm code */ -#endif -#endif -} - -#ifndef FASTEST -/* =========================================================================== - * Set match_start to the longest match starting at the given string and - * return its length. Matches shorter or equal to prev_length are discarded, - * in which case the result is equal to prev_length and match_start is - * garbage. - * IN assertions: cur_match is the head of the hash chain for the current - * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 - * OUT assertion: the match length is not greater than s->lookahead. - */ -#ifndef ASMV -/* For 80x86 and 680x0, an optimized version will be provided in match.asm or - * match.S. The code will be functionally equivalent. - */ -local uInt longest_match(s, cur_match) - deflate_state *s; - IPos cur_match; /* current match */ -{ - unsigned chain_length = s->max_chain_length;/* max hash chain length */ - register Bytef *scan = s->window + s->strstart; /* current string */ - register Bytef *match; /* matched string */ - register int len; /* length of current match */ - int best_len = (int)s->prev_length; /* best match length so far */ - int nice_match = s->nice_match; /* stop if match long enough */ - IPos limit = s->strstart > (IPos)MAX_DIST(s) ? - s->strstart - (IPos)MAX_DIST(s) : NIL; - /* Stop when cur_match becomes <= limit. To simplify the code, - * we prevent matches with the string of window index 0. - */ - Posf *prev = s->prev; - uInt wmask = s->w_mask; - -#ifdef UNALIGNED_OK - /* Compare two bytes at a time. Note: this is not always beneficial. - * Try with and without -DUNALIGNED_OK to check. - */ - register Bytef *strend = s->window + s->strstart + MAX_MATCH - 1; - register ush scan_start = *(ushf*)scan; - register ush scan_end = *(ushf*)(scan+best_len-1); -#else - register Bytef *strend = s->window + s->strstart + MAX_MATCH; - register Byte scan_end1 = scan[best_len-1]; - register Byte scan_end = scan[best_len]; -#endif - - /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. - * It is easy to get rid of this optimization if necessary. - */ - Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - - /* Do not waste too much time if we already have a good match: */ - if (s->prev_length >= s->good_match) { - chain_length >>= 2; - } - /* Do not look for matches beyond the end of the input. This is necessary - * to make deflate deterministic. - */ - if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead; - - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); - - do { - Assert(cur_match < s->strstart, "no future"); - match = s->window + cur_match; - - /* Skip to next match if the match length cannot increase - * or if the match length is less than 2. Note that the checks below - * for insufficient lookahead only occur occasionally for performance - * reasons. Therefore uninitialized memory will be accessed, and - * conditional jumps will be made that depend on those values. - * However the length of the match is limited to the lookahead, so - * the output of deflate is not affected by the uninitialized values. - */ -#if (defined(UNALIGNED_OK) && MAX_MATCH == 258) - /* This code assumes sizeof(unsigned short) == 2. Do not use - * UNALIGNED_OK if your compiler uses a different size. - */ - if (*(ushf*)(match+best_len-1) != scan_end || - *(ushf*)match != scan_start) continue; - - /* It is not necessary to compare scan[2] and match[2] since they are - * always equal when the other bytes match, given that the hash keys - * are equal and that HASH_BITS >= 8. Compare 2 bytes at a time at - * strstart+3, +5, ... up to strstart+257. We check for insufficient - * lookahead only every 4th comparison; the 128th check will be made - * at strstart+257. If MAX_MATCH-2 is not a multiple of 8, it is - * necessary to put more guard bytes at the end of the window, or - * to check more often for insufficient lookahead. - */ - Assert(scan[2] == match[2], "scan[2]?"); - scan++, match++; - do { - } while (*(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - *(ushf*)(scan+=2) == *(ushf*)(match+=2) && - scan < strend); - /* The funny "do {}" generates better code on most compilers */ - - /* Here, scan <= window+strstart+257 */ - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - if (*scan == *match) scan++; - - len = (MAX_MATCH - 1) - (int)(strend-scan); - scan = strend - (MAX_MATCH-1); - -#else /* UNALIGNED_OK */ - - if (match[best_len] != scan_end || - match[best_len-1] != scan_end1 || - *match != *scan || - *++match != scan[1]) continue; - - /* The check at best_len-1 can be removed because it will be made - * again later. (This heuristic is not always a win.) - * It is not necessary to compare scan[2] and match[2] since they - * are always equal when the other bytes match, given that - * the hash keys are equal and that HASH_BITS >= 8. - */ - scan += 2, match++; - Assert(*scan == *match, "match[2]?"); - - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. - */ - do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); - - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - - len = MAX_MATCH - (int)(strend - scan); - scan = strend - MAX_MATCH; - -#endif /* UNALIGNED_OK */ - - if (len > best_len) { - s->match_start = cur_match; - best_len = len; - if (len >= nice_match) break; -#ifdef UNALIGNED_OK - scan_end = *(ushf*)(scan+best_len-1); -#else - scan_end1 = scan[best_len-1]; - scan_end = scan[best_len]; -#endif - } - } while ((cur_match = prev[cur_match & wmask]) > limit - && --chain_length != 0); - - if ((uInt)best_len <= s->lookahead) return (uInt)best_len; - return s->lookahead; -} -#endif /* ASMV */ - -#else /* FASTEST */ - -/* --------------------------------------------------------------------------- - * Optimized version for FASTEST only - */ -local uInt longest_match(s, cur_match) - deflate_state *s; - IPos cur_match; /* current match */ -{ - register Bytef *scan = s->window + s->strstart; /* current string */ - register Bytef *match; /* matched string */ - register int len; /* length of current match */ - register Bytef *strend = s->window + s->strstart + MAX_MATCH; - - /* The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. - * It is easy to get rid of this optimization if necessary. - */ - Assert(s->hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); - - Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead"); - - Assert(cur_match < s->strstart, "no future"); - - match = s->window + cur_match; - - /* Return failure if the match length is less than 2: - */ - if (match[0] != scan[0] || match[1] != scan[1]) return MIN_MATCH-1; - - /* The check at best_len-1 can be removed because it will be made - * again later. (This heuristic is not always a win.) - * It is not necessary to compare scan[2] and match[2] since they - * are always equal when the other bytes match, given that - * the hash keys are equal and that HASH_BITS >= 8. - */ - scan += 2, match += 2; - Assert(*scan == *match, "match[2]?"); - - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. - */ - do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); - - Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan"); - - len = MAX_MATCH - (int)(strend - scan); - - if (len < MIN_MATCH) return MIN_MATCH - 1; - - s->match_start = cur_match; - return (uInt)len <= s->lookahead ? (uInt)len : s->lookahead; -} - -#endif /* FASTEST */ - -#ifdef ZLIB_DEBUG - -#define EQUAL 0 -/* result of memcmp for equal strings */ - -/* =========================================================================== - * Check that the match at match_start is indeed a match. - */ -local void check_match(s, start, match, length) - deflate_state *s; - IPos start, match; - int length; -{ - /* check that the match is indeed a match */ - if (zmemcmp(s->window + match, - s->window + start, length) != EQUAL) { - fprintf(stderr, " start %u, match %u, length %d\n", - start, match, length); - do { - fprintf(stderr, "%c%c", s->window[match++], s->window[start++]); - } while (--length != 0); - z_error("invalid match"); - } - if (z_verbose > 1) { - fprintf(stderr,"\\[%d,%d]", start-match, length); - do { putc(s->window[start++], stderr); } while (--length != 0); - } -} -#else -# define check_match(s, start, match, length) -#endif /* ZLIB_DEBUG */ - -/* =========================================================================== - * Fill the window when the lookahead becomes insufficient. - * Updates strstart and lookahead. - * - * IN assertion: lookahead < MIN_LOOKAHEAD - * OUT assertions: strstart <= window_size-MIN_LOOKAHEAD - * At least one byte has been read, or avail_in == 0; reads are - * performed for at least two bytes (required for the zip translate_eol - * option -- not supported here). - */ -local void fill_window(s) - deflate_state *s; -{ - unsigned n; - unsigned more; /* Amount of free space at the end of the window. */ - uInt wsize = s->w_size; - - Assert(s->lookahead < MIN_LOOKAHEAD, "already enough lookahead"); - - do { - more = (unsigned)(s->window_size -(ulg)s->lookahead -(ulg)s->strstart); - - /* Deal with !@#$% 64K limit: */ - if (sizeof(int) <= 2) { - if (more == 0 && s->strstart == 0 && s->lookahead == 0) { - more = wsize; - - } else if (more == (unsigned)(-1)) { - /* Very unlikely, but possible on 16 bit machine if - * strstart == 0 && lookahead == 1 (input done a byte at time) - */ - more--; - } - } - - /* If the window is almost full and there is insufficient lookahead, - * move the upper half to the lower one to make room in the upper half. - */ - if (s->strstart >= wsize+MAX_DIST(s)) { - - zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more); - s->match_start -= wsize; - s->strstart -= wsize; /* we now have strstart >= MAX_DIST */ - s->block_start -= (long) wsize; - slide_hash(s); - more += wsize; - } - if (s->strm->avail_in == 0) break; - - /* If there was no sliding: - * strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && - * more == window_size - lookahead - strstart - * => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) - * => more >= window_size - 2*WSIZE + 2 - * In the BIG_MEM or MMAP case (not yet supported), - * window_size == input_size + MIN_LOOKAHEAD && - * strstart + s->lookahead <= input_size => more >= MIN_LOOKAHEAD. - * Otherwise, window_size == 2*WSIZE so more >= 2. - * If there was sliding, more >= WSIZE. So in all cases, more >= 2. - */ - Assert(more >= 2, "more < 2"); - - n = read_buf(s->strm, s->window + s->strstart + s->lookahead, more); - s->lookahead += n; - - /* Initialize the hash value now that we have some input: */ - if (s->lookahead + s->insert >= MIN_MATCH) { - uInt str = s->strstart - s->insert; - s->ins_h = s->window[str]; - UPDATE_HASH(s, s->ins_h, s->window[str + 1]); -#if MIN_MATCH != 3 - Call UPDATE_HASH() MIN_MATCH-3 more times -#endif - while (s->insert) { - UPDATE_HASH(s, s->ins_h, s->window[str + MIN_MATCH-1]); -#ifndef FASTEST - s->prev[str & s->w_mask] = s->head[s->ins_h]; -#endif - s->head[s->ins_h] = (Pos)str; - str++; - s->insert--; - if (s->lookahead + s->insert < MIN_MATCH) - break; - } - } - /* If the whole input has less than MIN_MATCH bytes, ins_h is garbage, - * but this is not important since only literal bytes will be emitted. - */ - - } while (s->lookahead < MIN_LOOKAHEAD && s->strm->avail_in != 0); - - /* If the WIN_INIT bytes after the end of the current data have never been - * written, then zero those bytes in order to avoid memory check reports of - * the use of uninitialized (or uninitialised as Julian writes) bytes by - * the longest match routines. Update the high water mark for the next - * time through here. WIN_INIT is set to MAX_MATCH since the longest match - * routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. - */ - if (s->high_water < s->window_size) { - ulg curr = s->strstart + (ulg)(s->lookahead); - ulg init; - - if (s->high_water < curr) { - /* Previous high water mark below current data -- zero WIN_INIT - * bytes or up to end of window, whichever is less. - */ - init = s->window_size - curr; - if (init > WIN_INIT) - init = WIN_INIT; - zmemzero(s->window + curr, (unsigned)init); - s->high_water = curr + init; - } - else if (s->high_water < (ulg)curr + WIN_INIT) { - /* High water mark at or above current data, but below current data - * plus WIN_INIT -- zero out to current data plus WIN_INIT, or up - * to end of window, whichever is less. - */ - init = (ulg)curr + WIN_INIT - s->high_water; - if (init > s->window_size - s->high_water) - init = s->window_size - s->high_water; - zmemzero(s->window + s->high_water, (unsigned)init); - s->high_water += init; - } - } - - Assert((ulg)s->strstart <= s->window_size - MIN_LOOKAHEAD, - "not enough room for search"); -} - -/* =========================================================================== - * Flush the current block, with given end-of-file flag. - * IN assertion: strstart is set to the end of the current match. - */ -#define FLUSH_BLOCK_ONLY(s, last) { \ - _tr_flush_block(s, (s->block_start >= 0L ? \ - (charf *)&s->window[(unsigned)s->block_start] : \ - (charf *)Z_NULL), \ - (ulg)((long)s->strstart - s->block_start), \ - (last)); \ - s->block_start = s->strstart; \ - flush_pending(s->strm); \ - Tracev((stderr,"[FLUSH]")); \ -} - -/* Same but force premature exit if necessary. */ -#define FLUSH_BLOCK(s, last) { \ - FLUSH_BLOCK_ONLY(s, last); \ - if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \ -} - -/* Maximum stored block length in deflate format (not including header). */ -#define MAX_STORED 65535 - -/* Minimum of a and b. */ -#define MIN(a, b) ((a) > (b) ? (b) : (a)) - -/* =========================================================================== - * Copy without compression as much as possible from the input stream, return - * the current block state. - * - * In case deflateParams() is used to later switch to a non-zero compression - * level, s->matches (otherwise unused when storing) keeps track of the number - * of hash table slides to perform. If s->matches is 1, then one hash table - * slide will be done when switching. If s->matches is 2, the maximum value - * allowed here, then the hash table will be cleared, since two or more slides - * is the same as a clear. - * - * deflate_stored() is written to minimize the number of times an input byte is - * copied. It is most efficient with large input and output buffers, which - * maximizes the opportunites to have a single copy from next_in to next_out. - */ -local block_state deflate_stored(s, flush) - deflate_state *s; - int flush; -{ - /* Smallest worthy block size when not flushing or finishing. By default - * this is 32K. This can be as small as 507 bytes for memLevel == 1. For - * large input and output buffers, the stored block size will be larger. - */ - unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size); - - /* Copy as many min_block or larger stored blocks directly to next_out as - * possible. If flushing, copy the remaining available input to next_out as - * stored blocks, if there is enough space. - */ - unsigned len, left, have, last = 0; - unsigned used = s->strm->avail_in; - do { - /* Set len to the maximum size block that we can copy directly with the - * available input data and output space. Set left to how much of that - * would be copied from what's left in the window. - */ - len = MAX_STORED; /* maximum deflate stored block length */ - have = (s->bi_valid + 42) >> 3; /* number of header bytes */ - if (s->strm->avail_out < have) /* need room for header */ - break; - /* maximum stored block length that will fit in avail_out: */ - have = s->strm->avail_out - have; - left = s->strstart - s->block_start; /* bytes left in window */ - if (len > (ulg)left + s->strm->avail_in) - len = left + s->strm->avail_in; /* limit len to the input */ - if (len > have) - len = have; /* limit len to the output */ - - /* If the stored block would be less than min_block in length, or if - * unable to copy all of the available input when flushing, then try - * copying to the window and the pending buffer instead. Also don't - * write an empty block when flushing -- deflate() does that. - */ - if (len < min_block && ((len == 0 && flush != Z_FINISH) || - flush == Z_NO_FLUSH || - len != left + s->strm->avail_in)) - break; - - /* Make a dummy stored block in pending to get the header bytes, - * including any pending bits. This also updates the debugging counts. - */ - last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0; - _tr_stored_block(s, (char *)0, 0L, last); - - /* Replace the lengths in the dummy stored block with len. */ - s->pending_buf[s->pending - 4] = len; - s->pending_buf[s->pending - 3] = len >> 8; - s->pending_buf[s->pending - 2] = ~len; - s->pending_buf[s->pending - 1] = ~len >> 8; - - /* Write the stored block header bytes. */ - flush_pending(s->strm); - -#ifdef ZLIB_DEBUG - /* Update debugging counts for the data about to be copied. */ - s->compressed_len += len << 3; - s->bits_sent += len << 3; -#endif - - /* Copy uncompressed bytes from the window to next_out. */ - if (left) { - if (left > len) - left = len; - zmemcpy(s->strm->next_out, s->window + s->block_start, left); - s->strm->next_out += left; - s->strm->avail_out -= left; - s->strm->total_out += left; - s->block_start += left; - len -= left; - } - - /* Copy uncompressed bytes directly from next_in to next_out, updating - * the check value. - */ - if (len) { - read_buf(s->strm, s->strm->next_out, len); - s->strm->next_out += len; - s->strm->avail_out -= len; - s->strm->total_out += len; - } - } while (last == 0); - - /* Update the sliding window with the last s->w_size bytes of the copied - * data, or append all of the copied data to the existing window if less - * than s->w_size bytes were copied. Also update the number of bytes to - * insert in the hash tables, in the event that deflateParams() switches to - * a non-zero compression level. - */ - used -= s->strm->avail_in; /* number of input bytes directly copied */ - if (used) { - /* If any input was used, then no unused input remains in the window, - * therefore s->block_start == s->strstart. - */ - if (used >= s->w_size) { /* supplant the previous history */ - s->matches = 2; /* clear hash */ - zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size); - s->strstart = s->w_size; - } - else { - if (s->window_size - s->strstart <= used) { - /* Slide the window down. */ - s->strstart -= s->w_size; - zmemcpy(s->window, s->window + s->w_size, s->strstart); - if (s->matches < 2) - s->matches++; /* add a pending slide_hash() */ - } - zmemcpy(s->window + s->strstart, s->strm->next_in - used, used); - s->strstart += used; - } - s->block_start = s->strstart; - s->insert += MIN(used, s->w_size - s->insert); - } - if (s->high_water < s->strstart) - s->high_water = s->strstart; - - /* If the last block was written to next_out, then done. */ - if (last) - return finish_done; - - /* If flushing and all input has been consumed, then done. */ - if (flush != Z_NO_FLUSH && flush != Z_FINISH && - s->strm->avail_in == 0 && (long)s->strstart == s->block_start) - return block_done; - - /* Fill the window with any remaining input. */ - have = s->window_size - s->strstart - 1; - if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) { - /* Slide the window down. */ - s->block_start -= s->w_size; - s->strstart -= s->w_size; - zmemcpy(s->window, s->window + s->w_size, s->strstart); - if (s->matches < 2) - s->matches++; /* add a pending slide_hash() */ - have += s->w_size; /* more space now */ - } - if (have > s->strm->avail_in) - have = s->strm->avail_in; - if (have) { - read_buf(s->strm, s->window + s->strstart, have); - s->strstart += have; - } - if (s->high_water < s->strstart) - s->high_water = s->strstart; - - /* There was not enough avail_out to write a complete worthy or flushed - * stored block to next_out. Write a stored block to pending instead, if we - * have enough input for a worthy block, or if flushing and there is enough - * room for the remaining input as a stored block in the pending buffer. - */ - have = (s->bi_valid + 42) >> 3; /* number of header bytes */ - /* maximum stored block length that will fit in pending: */ - have = MIN(s->pending_buf_size - have, MAX_STORED); - min_block = MIN(have, s->w_size); - left = s->strstart - s->block_start; - if (left >= min_block || - ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH && - s->strm->avail_in == 0 && left <= have)) { - len = MIN(left, have); - last = flush == Z_FINISH && s->strm->avail_in == 0 && - len == left ? 1 : 0; - _tr_stored_block(s, (charf *)s->window + s->block_start, len, last); - s->block_start += len; - flush_pending(s->strm); - } - - /* We've done all we can with the available input and output. */ - return last ? finish_started : need_more; -} - -/* =========================================================================== - * Compress as much as possible from the input stream, return the current - * block state. - * This function does not perform lazy evaluation of matches and inserts - * new strings in the dictionary only for unmatched strings or for short - * matches. It is used only for the fast compression options. - */ -local block_state deflate_fast(s, flush) - deflate_state *s; - int flush; -{ - IPos hash_head; /* head of the hash chain */ - int bflush; /* set if current block must be flushed */ - - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the next match, plus MIN_MATCH bytes to insert the - * string following the next match. - */ - if (s->lookahead < MIN_LOOKAHEAD) { - fill_window(s); - if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* Insert the string window[strstart .. strstart+2] in the - * dictionary, and set hash_head to the head of the hash chain: - */ - hash_head = NIL; - if (s->lookahead >= MIN_MATCH) { - INSERT_STRING(s, s->strstart, hash_head); - } - - /* Find the longest match, discarding those <= prev_length. - * At this point we have always match_length < MIN_MATCH - */ - if (hash_head != NIL && s->strstart - hash_head <= MAX_DIST(s)) { - /* To simplify the code, we prevent matches with the string - * of window index 0 (in particular we have to avoid a match - * of the string with itself at the start of the input file). - */ - s->match_length = longest_match (s, hash_head); - /* longest_match() sets match_start */ - } - if (s->match_length >= MIN_MATCH) { - check_match(s, s->strstart, s->match_start, s->match_length); - - _tr_tally_dist(s, s->strstart - s->match_start, - s->match_length - MIN_MATCH, bflush); - - s->lookahead -= s->match_length; - - /* Insert new strings in the hash table only if the match length - * is not too large. This saves time but degrades compression. - */ -#ifndef FASTEST - if (s->match_length <= s->max_insert_length && - s->lookahead >= MIN_MATCH) { - s->match_length--; /* string at strstart already in table */ - do { - s->strstart++; - INSERT_STRING(s, s->strstart, hash_head); - /* strstart never exceeds WSIZE-MAX_MATCH, so there are - * always MIN_MATCH bytes ahead. - */ - } while (--s->match_length != 0); - s->strstart++; - } else -#endif - { - s->strstart += s->match_length; - s->match_length = 0; - s->ins_h = s->window[s->strstart]; - UPDATE_HASH(s, s->ins_h, s->window[s->strstart+1]); -#if MIN_MATCH != 3 - Call UPDATE_HASH() MIN_MATCH-3 more times -#endif - /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not - * matter since it will be recomputed at next deflate call. - */ - } - } else { - /* No match, output a literal byte */ - Tracevv((stderr,"%c", s->window[s->strstart])); - _tr_tally_lit (s, s->window[s->strstart], bflush); - s->lookahead--; - s->strstart++; - } - if (bflush) FLUSH_BLOCK(s, 0); - } - s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; - if (flush == Z_FINISH) { - FLUSH_BLOCK(s, 1); - return finish_done; - } - if (s->last_lit) - FLUSH_BLOCK(s, 0); - return block_done; -} - -#ifndef FASTEST -/* =========================================================================== - * Same as above, but achieves better compression. We use a lazy - * evaluation for matches: a match is finally adopted only if there is - * no better match at the next window position. - */ -local block_state deflate_slow(s, flush) - deflate_state *s; - int flush; -{ - IPos hash_head; /* head of hash chain */ - int bflush; /* set if current block must be flushed */ - - /* Process the input block. */ - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the next match, plus MIN_MATCH bytes to insert the - * string following the next match. - */ - if (s->lookahead < MIN_LOOKAHEAD) { - fill_window(s); - if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* Insert the string window[strstart .. strstart+2] in the - * dictionary, and set hash_head to the head of the hash chain: - */ - hash_head = NIL; - if (s->lookahead >= MIN_MATCH) { - INSERT_STRING(s, s->strstart, hash_head); - } - - /* Find the longest match, discarding those <= prev_length. - */ - s->prev_length = s->match_length, s->prev_match = s->match_start; - s->match_length = MIN_MATCH-1; - - if (hash_head != NIL && s->prev_length < s->max_lazy_match && - s->strstart - hash_head <= MAX_DIST(s)) { - /* To simplify the code, we prevent matches with the string - * of window index 0 (in particular we have to avoid a match - * of the string with itself at the start of the input file). - */ - s->match_length = longest_match (s, hash_head); - /* longest_match() sets match_start */ - - if (s->match_length <= 5 && (s->strategy == Z_FILTERED -#if TOO_FAR <= 32767 - || (s->match_length == MIN_MATCH && - s->strstart - s->match_start > TOO_FAR) -#endif - )) { - - /* If prev_match is also MIN_MATCH, match_start is garbage - * but we will ignore the current match anyway. - */ - s->match_length = MIN_MATCH-1; - } - } - /* If there was a match at the previous step and the current - * match is not better, output the previous match: - */ - if (s->prev_length >= MIN_MATCH && s->match_length <= s->prev_length) { - uInt max_insert = s->strstart + s->lookahead - MIN_MATCH; - /* Do not insert strings in hash table beyond this. */ - - check_match(s, s->strstart-1, s->prev_match, s->prev_length); - - _tr_tally_dist(s, s->strstart -1 - s->prev_match, - s->prev_length - MIN_MATCH, bflush); - - /* Insert in hash table all strings up to the end of the match. - * strstart-1 and strstart are already inserted. If there is not - * enough lookahead, the last two strings are not inserted in - * the hash table. - */ - s->lookahead -= s->prev_length-1; - s->prev_length -= 2; - do { - if (++s->strstart <= max_insert) { - INSERT_STRING(s, s->strstart, hash_head); - } - } while (--s->prev_length != 0); - s->match_available = 0; - s->match_length = MIN_MATCH-1; - s->strstart++; - - if (bflush) FLUSH_BLOCK(s, 0); - - } else if (s->match_available) { - /* If there was no match at the previous position, output a - * single literal. If there was a match but the current match - * is longer, truncate the previous match to a single literal. - */ - Tracevv((stderr,"%c", s->window[s->strstart-1])); - _tr_tally_lit(s, s->window[s->strstart-1], bflush); - if (bflush) { - FLUSH_BLOCK_ONLY(s, 0); - } - s->strstart++; - s->lookahead--; - if (s->strm->avail_out == 0) return need_more; - } else { - /* There is no previous match to compare with, wait for - * the next step to decide. - */ - s->match_available = 1; - s->strstart++; - s->lookahead--; - } - } - Assert (flush != Z_NO_FLUSH, "no flush?"); - if (s->match_available) { - Tracevv((stderr,"%c", s->window[s->strstart-1])); - _tr_tally_lit(s, s->window[s->strstart-1], bflush); - s->match_available = 0; - } - s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; - if (flush == Z_FINISH) { - FLUSH_BLOCK(s, 1); - return finish_done; - } - if (s->last_lit) - FLUSH_BLOCK(s, 0); - return block_done; -} -#endif /* FASTEST */ - -/* =========================================================================== - * For Z_RLE, simply look for runs of bytes, generate matches only of distance - * one. Do not maintain a hash table. (It will be regenerated if this run of - * deflate switches away from Z_RLE.) - */ -local block_state deflate_rle(s, flush) - deflate_state *s; - int flush; -{ - int bflush; /* set if current block must be flushed */ - uInt prev; /* byte at distance one to match */ - Bytef *scan, *strend; /* scan goes up to strend for length of run */ - - for (;;) { - /* Make sure that we always have enough lookahead, except - * at the end of the input file. We need MAX_MATCH bytes - * for the longest run, plus one for the unrolled loop. - */ - if (s->lookahead <= MAX_MATCH) { - fill_window(s); - if (s->lookahead <= MAX_MATCH && flush == Z_NO_FLUSH) { - return need_more; - } - if (s->lookahead == 0) break; /* flush the current block */ - } - - /* See how many times the previous byte repeats */ - s->match_length = 0; - if (s->lookahead >= MIN_MATCH && s->strstart > 0) { - scan = s->window + s->strstart - 1; - prev = *scan; - if (prev == *++scan && prev == *++scan && prev == *++scan) { - strend = s->window + s->strstart + MAX_MATCH; - do { - } while (prev == *++scan && prev == *++scan && - prev == *++scan && prev == *++scan && - prev == *++scan && prev == *++scan && - prev == *++scan && prev == *++scan && - scan < strend); - s->match_length = MAX_MATCH - (uInt)(strend - scan); - if (s->match_length > s->lookahead) - s->match_length = s->lookahead; - } - Assert(scan <= s->window+(uInt)(s->window_size-1), "wild scan"); - } - - /* Emit match if have run of MIN_MATCH or longer, else emit literal */ - if (s->match_length >= MIN_MATCH) { - check_match(s, s->strstart, s->strstart - 1, s->match_length); - - _tr_tally_dist(s, 1, s->match_length - MIN_MATCH, bflush); - - s->lookahead -= s->match_length; - s->strstart += s->match_length; - s->match_length = 0; - } else { - /* No match, output a literal byte */ - Tracevv((stderr,"%c", s->window[s->strstart])); - _tr_tally_lit (s, s->window[s->strstart], bflush); - s->lookahead--; - s->strstart++; - } - if (bflush) FLUSH_BLOCK(s, 0); - } - s->insert = 0; - if (flush == Z_FINISH) { - FLUSH_BLOCK(s, 1); - return finish_done; - } - if (s->last_lit) - FLUSH_BLOCK(s, 0); - return block_done; -} - -/* =========================================================================== - * For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. - * (It will be regenerated if this run of deflate switches away from Huffman.) - */ -local block_state deflate_huff(s, flush) - deflate_state *s; - int flush; -{ - int bflush; /* set if current block must be flushed */ - - for (;;) { - /* Make sure that we have a literal to write. */ - if (s->lookahead == 0) { - fill_window(s); - if (s->lookahead == 0) { - if (flush == Z_NO_FLUSH) - return need_more; - break; /* flush the current block */ - } - } - - /* Output a literal byte */ - s->match_length = 0; - Tracevv((stderr,"%c", s->window[s->strstart])); - _tr_tally_lit (s, s->window[s->strstart], bflush); - s->lookahead--; - s->strstart++; - if (bflush) FLUSH_BLOCK(s, 0); - } - s->insert = 0; - if (flush == Z_FINISH) { - FLUSH_BLOCK(s, 1); - return finish_done; - } - if (s->last_lit) - FLUSH_BLOCK(s, 0); - return block_done; -} diff --git a/base/poco/Foundation/src/deflate.h b/base/poco/Foundation/src/deflate.h deleted file mode 100644 index 5dd6e41bb29..00000000000 --- a/base/poco/Foundation/src/deflate.h +++ /dev/null @@ -1,355 +0,0 @@ -/* deflate.h -- internal compression state - * Copyright (C) 1995-2016 Jean-loup Gailly - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* @(#) $Id$ */ - -#ifndef DEFLATE_H -#define DEFLATE_H - -#include "zutil.h" - -/* define NO_GZIP when compiling if you want to disable gzip header and - trailer creation by deflate(). NO_GZIP would be used to avoid linking in - the crc code when it is not needed. For shared libraries, gzip encoding - should be left enabled. */ -#ifndef NO_GZIP -# define GZIP -#endif - -/* =========================================================================== - * Internal compression state. - */ - -#define LENGTH_CODES 29 -/* number of length codes, not counting the special END_BLOCK code */ - -#define LITERALS 256 -/* number of literal bytes 0..255 */ - -#define L_CODES (LITERALS + 1 + LENGTH_CODES) -/* number of Literal or Length codes, including the END_BLOCK code */ - -#define D_CODES 30 -/* number of distance codes */ - -#define BL_CODES 19 -/* number of codes used to transfer the bit lengths */ - -#define HEAP_SIZE (2 * L_CODES + 1) -/* maximum heap size */ - -#define MAX_BITS 15 -/* All codes must not exceed MAX_BITS bits */ - -#define Buf_size 16 -/* size of bit buffer in bi_buf */ - -#define INIT_STATE 42 /* zlib header -> BUSY_STATE */ -#ifdef GZIP -# define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ -#endif -#define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ -#define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ -#define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ -#define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ -#define BUSY_STATE 113 /* deflate -> FINISH_STATE */ -#define FINISH_STATE 666 /* stream complete */ -/* Stream status */ - - -/* Data structure describing a single value and its code string. */ -typedef struct ct_data_s -{ - union - { - ush freq; /* frequency count */ - ush code; /* bit string */ - } fc; - union - { - ush dad; /* father node in Huffman tree */ - ush len; /* length of bit string */ - } dl; -} FAR ct_data; - -#define Freq fc.freq -#define Code fc.code -#define Dad dl.dad -#define Len dl.len - -typedef struct static_tree_desc_s static_tree_desc; - -typedef struct tree_desc_s -{ - ct_data * dyn_tree; /* the dynamic tree */ - int max_code; /* largest code with non zero frequency */ - const static_tree_desc * stat_desc; /* the corresponding static tree */ -} FAR tree_desc; - -typedef ush Pos; -typedef Pos FAR Posf; -typedef unsigned IPos; - -/* A Pos is an index in the character window. We use short instead of int to - * save space in the various tables. IPos is used only for parameter passing. - */ - -typedef struct internal_state -{ - z_streamp strm; /* pointer back to this zlib stream */ - int status; /* as the name implies */ - Bytef * pending_buf; /* output still pending */ - ulg pending_buf_size; /* size of pending_buf */ - Bytef * pending_out; /* next pending byte to output to the stream */ - ulg pending; /* nb of bytes in the pending buffer */ - int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ - gz_headerp gzhead; /* gzip header information to write */ - ulg gzindex; /* where in extra, name, or comment */ - Byte method; /* can only be DEFLATED */ - int last_flush; /* value of flush param for previous deflate call */ - - /* used by deflate.c: */ - - uInt w_size; /* LZ77 window size (32K by default) */ - uInt w_bits; /* log2(w_size) (8..16) */ - uInt w_mask; /* w_size - 1 */ - - Bytef * window; - /* Sliding window. Input bytes are read into the second half of the window, - * and move to the first half later to keep a dictionary of at least wSize - * bytes. With this organization, matches are limited to a distance of - * wSize-MAX_MATCH bytes, but this ensures that IO is always - * performed with a length multiple of the block size. Also, it limits - * the window size to 64K, which is quite useful on MS-DOS. - * To do: use the user input buffer as sliding window. - */ - - ulg window_size; - /* Actual size of window: 2*wSize, except when the user input buffer - * is directly used as sliding window. - */ - - Posf * prev; - /* Link to older string with same hash index. To limit the size of this - * array to 64K, this link is maintained only for the last 32K strings. - * An index in this array is thus a window index modulo 32K. - */ - - Posf * head; /* Heads of the hash chains or NIL. */ - - uInt ins_h; /* hash index of string to be inserted */ - uInt hash_size; /* number of elements in hash table */ - uInt hash_bits; /* log2(hash_size) */ - uInt hash_mask; /* hash_size-1 */ - - uInt hash_shift; - /* Number of bits by which ins_h must be shifted at each input - * step. It must be such that after MIN_MATCH steps, the oldest - * byte no longer takes part in the hash key, that is: - * hash_shift * MIN_MATCH >= hash_bits - */ - - long block_start; - /* Window position at the beginning of the current output block. Gets - * negative when the window is moved backwards. - */ - - uInt match_length; /* length of best match */ - IPos prev_match; /* previous match */ - int match_available; /* set if previous match exists */ - uInt strstart; /* start of string to insert */ - uInt match_start; /* start of matching string */ - uInt lookahead; /* number of valid bytes ahead in window */ - - uInt prev_length; - /* Length of the best match at previous step. Matches not greater than this - * are discarded. This is used in the lazy match evaluation. - */ - - uInt max_chain_length; - /* To speed up deflation, hash chains are never searched beyond this - * length. A higher limit improves compression ratio but degrades the - * speed. - */ - - uInt max_lazy_match; - /* Attempt to find a better match only when the current match is strictly - * smaller than this value. This mechanism is used only for compression - * levels >= 4. - */ -#define max_insert_length max_lazy_match - /* Insert new strings in the hash table only if the match length is not - * greater than this length. This saves time but degrades compression. - * max_insert_length is used only for compression levels <= 3. - */ - - int level; /* compression level (1..9) */ - int strategy; /* favor or force Huffman coding*/ - - uInt good_match; - /* Use a faster search when the previous match is longer than this */ - - int nice_match; /* Stop searching when current match exceeds this */ - - /* used by trees.c: */ - /* Didn't use ct_data typedef below to suppress compiler warning */ - struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ - struct ct_data_s dyn_dtree[2 * D_CODES + 1]; /* distance tree */ - struct ct_data_s bl_tree[2 * BL_CODES + 1]; /* Huffman tree for bit lengths */ - - struct tree_desc_s l_desc; /* desc. for literal tree */ - struct tree_desc_s d_desc; /* desc. for distance tree */ - struct tree_desc_s bl_desc; /* desc. for bit length tree */ - - ush bl_count[MAX_BITS + 1]; - /* number of codes at each bit length for an optimal tree */ - - int heap[2 * L_CODES + 1]; /* heap used to build the Huffman trees */ - int heap_len; /* number of elements in the heap */ - int heap_max; /* element of largest frequency */ - /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. - * The same heap array is used to build all trees. - */ - - uch depth[2 * L_CODES + 1]; - /* Depth of each subtree used as tie breaker for trees of equal frequency - */ - - uchf * l_buf; /* buffer for literals or lengths */ - - uInt lit_bufsize; - /* Size of match buffer for literals/lengths. There are 4 reasons for - * limiting lit_bufsize to 64K: - * - frequencies can be kept in 16 bit counters - * - if compression is not successful for the first block, all input - * data is still in the window so we can still emit a stored block even - * when input comes from standard input. (This can also be done for - * all blocks if lit_bufsize is not greater than 32K.) - * - if compression is not successful for a file smaller than 64K, we can - * even emit a stored file instead of a stored block (saving 5 bytes). - * This is applicable only for zip (not gzip or zlib). - * - creating new Huffman trees less frequently may not provide fast - * adaptation to changes in the input data statistics. (Take for - * example a binary file with poorly compressible code followed by - * a highly compressible string table.) Smaller buffer sizes give - * fast adaptation but have of course the overhead of transmitting - * trees more frequently. - * - I can't count above 4 - */ - - uInt last_lit; /* running index in l_buf */ - - ushf * d_buf; - /* Buffer for distances. To simplify the code, d_buf and l_buf have - * the same number of elements. To use different lengths, an extra flag - * array would be necessary. - */ - - ulg opt_len; /* bit length of current block with optimal trees */ - ulg static_len; /* bit length of current block with static trees */ - uInt matches; /* number of string matches in current block */ - uInt insert; /* bytes at end of window left to insert */ - -#ifdef ZLIB_DEBUG - ulg compressed_len; /* total bit length of compressed file mod 2^32 */ - ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ -#endif - - ush bi_buf; - /* Output buffer. bits are inserted starting at the bottom (least - * significant bits). - */ - int bi_valid; - /* Number of valid bits in bi_buf. All bits above the last valid bit - * are always zero. - */ - - ulg high_water; - /* High water mark offset in window for initialized bytes -- bytes above - * this are set to zero in order to avoid memory check warnings when - * longest match routines access bytes past the input. This is then - * updated to the new high water mark. - */ - -} FAR deflate_state; - -/* Output a byte on the stream. - * IN assertion: there is enough room in pending_buf. - */ -#define put_byte(s, c) \ - { \ - s->pending_buf[s->pending++] = (Bytef)(c); \ - } - - -#define MIN_LOOKAHEAD (MAX_MATCH + MIN_MATCH + 1) -/* Minimum amount of lookahead, except at the end of the input file. - * See deflate.c for comments about the MIN_MATCH+1. - */ - -#define MAX_DIST(s) ((s)->w_size - MIN_LOOKAHEAD) -/* In order to simplify the code, particularly on 16 bit machines, match - * distances are limited to MAX_DIST instead of WSIZE. - */ - -#define WIN_INIT MAX_MATCH -/* Number of bytes after end of data in window to initialize in order to avoid - memory checker errors from longest match routines */ - -/* in trees.c */ -void ZLIB_INTERNAL _tr_init OF((deflate_state * s)); -int ZLIB_INTERNAL _tr_tally OF((deflate_state * s, unsigned dist, unsigned lc)); -void ZLIB_INTERNAL _tr_flush_block OF((deflate_state * s, charf * buf, ulg stored_len, int last)); -void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state * s)); -void ZLIB_INTERNAL _tr_align OF((deflate_state * s)); -void ZLIB_INTERNAL _tr_stored_block OF((deflate_state * s, charf * buf, ulg stored_len, int last)); - -#define d_code(dist) ((dist) < 256 ? _dist_code[dist] : _dist_code[256 + ((dist) >> 7)]) -/* Mapping from a distance to a distance code. dist is the distance - 1 and - * must not have side effects. _dist_code[256] and _dist_code[257] are never - * used. - */ - -#ifndef ZLIB_DEBUG -/* Inline versions of _tr_tally for speed: */ - -# if defined(GEN_TREES_H) || !defined(STDC) -extern uch ZLIB_INTERNAL _length_code[]; -extern uch ZLIB_INTERNAL _dist_code[]; -# else -extern const uch ZLIB_INTERNAL _length_code[]; -extern const uch ZLIB_INTERNAL _dist_code[]; -# endif - -# define _tr_tally_lit(s, c, flush) \ - { \ - uch cc = (c); \ - s->d_buf[s->last_lit] = 0; \ - s->l_buf[s->last_lit++] = cc; \ - s->dyn_ltree[cc].Freq++; \ - flush = (s->last_lit == s->lit_bufsize - 1); \ - } -# define _tr_tally_dist(s, distance, length, flush) \ - { \ - uch len = (uch)(length); \ - ush dist = (ush)(distance); \ - s->d_buf[s->last_lit] = dist; \ - s->l_buf[s->last_lit++] = len; \ - dist--; \ - s->dyn_ltree[_length_code[len] + LITERALS + 1].Freq++; \ - s->dyn_dtree[d_code(dist)].Freq++; \ - flush = (s->last_lit == s->lit_bufsize - 1); \ - } -#else -# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) -# define _tr_tally_dist(s, distance, length, flush) flush = _tr_tally(s, distance, length) -#endif - -#endif /* DEFLATE_H */ diff --git a/base/poco/Foundation/src/diy-fp.cc b/base/poco/Foundation/src/diy-fp.cc deleted file mode 100644 index ddd1891b168..00000000000 --- a/base/poco/Foundation/src/diy-fp.cc +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - -#include "diy-fp.h" -#include "utils.h" - -namespace double_conversion { - -void DiyFp::Multiply(const DiyFp& other) { - // Simply "emulates" a 128 bit multiplication. - // However: the resulting number only contains 64 bits. The least - // significant 64 bits are only used for rounding the most significant 64 - // bits. - const uint64_t kM32 = 0xFFFFFFFFU; - uint64_t a = f_ >> 32; - uint64_t b = f_ & kM32; - uint64_t c = other.f_ >> 32; - uint64_t d = other.f_ & kM32; - uint64_t ac = a * c; - uint64_t bc = b * c; - uint64_t ad = a * d; - uint64_t bd = b * d; - uint64_t tmp = (bd >> 32) + (ad & kM32) + (bc & kM32); - // By adding 1U << 31 to tmp we round the final result. - // Halfway cases will be round up. - tmp += 1U << 31; - uint64_t result_f = ac + (ad >> 32) + (bc >> 32) + (tmp >> 32); - e_ += other.e_ + 64; - f_ = result_f; -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/diy-fp.h b/base/poco/Foundation/src/diy-fp.h deleted file mode 100644 index 03581bc16ae..00000000000 --- a/base/poco/Foundation/src/diy-fp.h +++ /dev/null @@ -1,127 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_DIY_FP_H_ -#define DOUBLE_CONVERSION_DIY_FP_H_ - -#include "utils.h" - -namespace double_conversion -{ - -// This "Do It Yourself Floating Point" class implements a floating-point number -// with a uint64 significand and an int exponent. Normalized DiyFp numbers will -// have the most significant bit of the significand set. -// Multiplication and Subtraction do not normalize their results. -// DiyFp are not designed to contain special doubles (NaN and Infinity). -class DiyFp -{ -public: - static const int kSignificandSize = 64; - - DiyFp() : f_(0), e_(0) { } - DiyFp(uint64_t f, int e) : f_(f), e_(e) { } - - // this = this - other. - // The exponents of both numbers must be the same and the significand of this - // must be bigger than the significand of other. - // The result will not be normalized. - void Subtract(const DiyFp & other) - { - ASSERT(e_ == other.e_); - ASSERT(f_ >= other.f_); - f_ -= other.f_; - } - - // Returns a - b. - // The exponents of both numbers must be the same and this must be bigger - // than other. The result will not be normalized. - static DiyFp Minus(const DiyFp & a, const DiyFp & b) - { - DiyFp result = a; - result.Subtract(b); - return result; - } - - - // this = this * other. - void Multiply(const DiyFp & other); - - // returns a * b; - static DiyFp Times(const DiyFp & a, const DiyFp & b) - { - DiyFp result = a; - result.Multiply(b); - return result; - } - - void Normalize() - { - ASSERT(f_ != 0); - uint64_t f = f_; - int e = e_; - - // This method is mainly called for normalizing boundaries. In general - // boundaries need to be shifted by 10 bits. We thus optimize for this case. - const uint64_t k10MSBits = UINT64_2PART_C(0xFFC00000, 00000000); - while ((f & k10MSBits) == 0) - { - f <<= 10; - e -= 10; - } - while ((f & kUint64MSB) == 0) - { - f <<= 1; - e--; - } - f_ = f; - e_ = e; - } - - static DiyFp Normalize(const DiyFp & a) - { - DiyFp result = a; - result.Normalize(); - return result; - } - - uint64_t f() const { return f_; } - int e() const { return e_; } - - void set_f(uint64_t new_value) { f_ = new_value; } - void set_e(int new_value) { e_ = new_value; } - -private: - static const uint64_t kUint64MSB = UINT64_2PART_C(0x80000000, 00000000); - - uint64_t f_; - int e_; -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_DIY_FP_H_ diff --git a/base/poco/Foundation/src/double-conversion.cc b/base/poco/Foundation/src/double-conversion.cc deleted file mode 100644 index 39ad2461e9b..00000000000 --- a/base/poco/Foundation/src/double-conversion.cc +++ /dev/null @@ -1,911 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include - -#include "double-conversion.h" - -#include "bignum-dtoa.h" -#include "fast-dtoa.h" -#include "fixed-dtoa.h" -#include "ieee.h" -#include "strtod.h" -#include "utils.h" - -namespace double_conversion { - -const DoubleToStringConverter& DoubleToStringConverter::EcmaScriptConverter() { - int flags = UNIQUE_ZERO | EMIT_POSITIVE_EXPONENT_SIGN; - static DoubleToStringConverter converter(flags, - "Infinity", - "NaN", - 'e', - -6, 21, - 6, 0); - return converter; -} - - -bool DoubleToStringConverter::HandleSpecialValues( - double value, - StringBuilder* result_builder) const { - Double double_inspect(value); - if (double_inspect.IsInfinite()) { - if (infinity_symbol_ == NULL) return false; - if (value < 0) { - result_builder->AddCharacter('-'); - } - result_builder->AddString(infinity_symbol_); - return true; - } - if (double_inspect.IsNan()) { - if (nan_symbol_ == NULL) return false; - result_builder->AddString(nan_symbol_); - return true; - } - return false; -} - - -void DoubleToStringConverter::CreateExponentialRepresentation( - const char* decimal_digits, - int length, - int exponent, - StringBuilder* result_builder) const { - ASSERT(length != 0); - result_builder->AddCharacter(decimal_digits[0]); - if (length != 1) { - result_builder->AddCharacter('.'); - result_builder->AddSubstring(&decimal_digits[1], length-1); - } - result_builder->AddCharacter(exponent_character_); - if (exponent < 0) { - result_builder->AddCharacter('-'); - exponent = -exponent; - } else { - if ((flags_ & EMIT_POSITIVE_EXPONENT_SIGN) != 0) { - result_builder->AddCharacter('+'); - } - } - if (exponent == 0) { - result_builder->AddCharacter('0'); - return; - } - ASSERT(exponent < 1e4); - const int kMaxExponentLength = 5; - char buffer[kMaxExponentLength + 1]; - buffer[kMaxExponentLength] = '\0'; - int first_char_pos = kMaxExponentLength; - while (exponent > 0) { - buffer[--first_char_pos] = '0' + (exponent % 10); - exponent /= 10; - } - result_builder->AddSubstring(&buffer[first_char_pos], - kMaxExponentLength - first_char_pos); -} - - -void DoubleToStringConverter::CreateDecimalRepresentation( - const char* decimal_digits, - int length, - int decimal_point, - int digits_after_point, - StringBuilder* result_builder) const { - // Create a representation that is padded with zeros if needed. - if (decimal_point <= 0) { - // "0.00000decimal_rep". - result_builder->AddCharacter('0'); - if (digits_after_point > 0) { - result_builder->AddCharacter('.'); - result_builder->AddPadding('0', -decimal_point); - ASSERT(length <= digits_after_point - (-decimal_point)); - result_builder->AddSubstring(decimal_digits, length); - int remaining_digits = digits_after_point - (-decimal_point) - length; - result_builder->AddPadding('0', remaining_digits); - } - } else if (decimal_point >= length) { - // "decimal_rep0000.00000" or "decimal_rep.0000" - result_builder->AddSubstring(decimal_digits, length); - result_builder->AddPadding('0', decimal_point - length); - if (digits_after_point > 0) { - result_builder->AddCharacter('.'); - result_builder->AddPadding('0', digits_after_point); - } - } else { - // "decima.l_rep000" - ASSERT(digits_after_point > 0); - result_builder->AddSubstring(decimal_digits, decimal_point); - result_builder->AddCharacter('.'); - ASSERT(length - decimal_point <= digits_after_point); - result_builder->AddSubstring(&decimal_digits[decimal_point], - length - decimal_point); - int remaining_digits = digits_after_point - (length - decimal_point); - result_builder->AddPadding('0', remaining_digits); - } - if (digits_after_point == 0) { - if ((flags_ & EMIT_TRAILING_DECIMAL_POINT) != 0) { - result_builder->AddCharacter('.'); - } - if ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) { - result_builder->AddCharacter('0'); - } - } -} - - -bool DoubleToStringConverter::ToShortestIeeeNumber( - double value, - StringBuilder* result_builder, - DoubleToStringConverter::DtoaMode mode) const { - ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE); - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - int decimal_point; - bool sign; - const int kDecimalRepCapacity = kBase10MaximalLength + 1; - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - - DoubleToAscii(value, mode, 0, decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - - bool unique_zero = (flags_ & UNIQUE_ZERO) != 0; - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - int exponent = decimal_point - 1; - if ((decimal_in_shortest_low_ <= exponent) && - (exponent < decimal_in_shortest_high_)) { - CreateDecimalRepresentation(decimal_rep, decimal_rep_length, - decimal_point, - Max(0, decimal_rep_length - decimal_point), - result_builder); - } else { - CreateExponentialRepresentation(decimal_rep, decimal_rep_length, exponent, - result_builder); - } - return true; -} - - -bool DoubleToStringConverter::ToFixed(double value, - int requested_digits, - StringBuilder* result_builder) const { - ASSERT(kMaxFixedDigitsBeforePoint == 60); - const double kFirstNonFixed = 1e60; - - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - if (requested_digits > kMaxFixedDigitsAfterPoint) return false; - if (value >= kFirstNonFixed || value <= -kFirstNonFixed) return false; - - // Find a sufficiently precise decimal representation of n. - int decimal_point; - bool sign; - // Add space for the '\0' byte. - const int kDecimalRepCapacity = - kMaxFixedDigitsBeforePoint + kMaxFixedDigitsAfterPoint + 1; - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - DoubleToAscii(value, FIXED, requested_digits, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - - bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, - requested_digits, result_builder); - return true; -} - - -bool DoubleToStringConverter::ToExponential( - double value, - int requested_digits, - StringBuilder* result_builder) const { - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - if (requested_digits < -1) return false; - if (requested_digits > kMaxExponentialDigits) return false; - - int decimal_point; - bool sign; - // Add space for digit before the decimal point and the '\0' character. - const int kDecimalRepCapacity = kMaxExponentialDigits + 2; - ASSERT(kDecimalRepCapacity > kBase10MaximalLength); - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - - if (requested_digits == -1) { - DoubleToAscii(value, SHORTEST, 0, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - } else { - DoubleToAscii(value, PRECISION, requested_digits + 1, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - ASSERT(decimal_rep_length <= requested_digits + 1); - - for (int i = decimal_rep_length; i < requested_digits + 1; ++i) { - decimal_rep[i] = '0'; - } - decimal_rep_length = requested_digits + 1; - } - - bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - int exponent = decimal_point - 1; - CreateExponentialRepresentation(decimal_rep, - decimal_rep_length, - exponent, - result_builder); - return true; -} - - -bool DoubleToStringConverter::ToPrecision(double value, - int precision, - StringBuilder* result_builder) const { - if (Double(value).IsSpecial()) { - return HandleSpecialValues(value, result_builder); - } - - if (precision < kMinPrecisionDigits || precision > kMaxPrecisionDigits) { - return false; - } - - // Find a sufficiently precise decimal representation of n. - int decimal_point; - bool sign; - // Add one for the terminating null character. - const int kDecimalRepCapacity = kMaxPrecisionDigits + 1; - char decimal_rep[kDecimalRepCapacity]; - int decimal_rep_length; - - DoubleToAscii(value, PRECISION, precision, - decimal_rep, kDecimalRepCapacity, - &sign, &decimal_rep_length, &decimal_point); - ASSERT(decimal_rep_length <= precision); - - bool unique_zero = ((flags_ & UNIQUE_ZERO) != 0); - if (sign && (value != 0.0 || !unique_zero)) { - result_builder->AddCharacter('-'); - } - - // The exponent if we print the number as x.xxeyyy. That is with the - // decimal point after the first digit. - int exponent = decimal_point - 1; - - int extra_zero = ((flags_ & EMIT_TRAILING_ZERO_AFTER_POINT) != 0) ? 1 : 0; - if ((-decimal_point + 1 > max_leading_padding_zeroes_in_precision_mode_) || - (decimal_point - precision + extra_zero > - max_trailing_padding_zeroes_in_precision_mode_)) { - // Fill buffer to contain 'precision' digits. - // Usually the buffer is already at the correct length, but 'DoubleToAscii' - // is allowed to return less characters. - for (int i = decimal_rep_length; i < precision; ++i) { - decimal_rep[i] = '0'; - } - - CreateExponentialRepresentation(decimal_rep, - precision, - exponent, - result_builder); - } else { - CreateDecimalRepresentation(decimal_rep, decimal_rep_length, decimal_point, - Max(0, precision - decimal_point), - result_builder); - } - return true; -} - - -static BignumDtoaMode DtoaToBignumDtoaMode( - DoubleToStringConverter::DtoaMode dtoa_mode) { - switch (dtoa_mode) { - case DoubleToStringConverter::SHORTEST: return BIGNUM_DTOA_SHORTEST; - case DoubleToStringConverter::SHORTEST_SINGLE: - return BIGNUM_DTOA_SHORTEST_SINGLE; - case DoubleToStringConverter::FIXED: return BIGNUM_DTOA_FIXED; - case DoubleToStringConverter::PRECISION: return BIGNUM_DTOA_PRECISION; - default: - UNREACHABLE(); - return BIGNUM_DTOA_SHORTEST; - } -} - - -void DoubleToStringConverter::DoubleToAscii(double v, - DtoaMode mode, - int requested_digits, - char* buffer, - int buffer_length, - bool* sign, - int* length, - int* point) { - Vector vector(buffer, buffer_length); - ASSERT(!Double(v).IsSpecial()); - ASSERT(mode == SHORTEST || mode == SHORTEST_SINGLE || requested_digits >= 0); - - if (Double(v).Sign() < 0) { - *sign = true; - v = -v; - } else { - *sign = false; - } - - if (mode == PRECISION && requested_digits == 0) { - vector[0] = '\0'; - *length = 0; - return; - } - - if (v == 0) { - vector[0] = '0'; - vector[1] = '\0'; - *length = 1; - *point = 1; - return; - } - - bool fast_worked; - switch (mode) { - case SHORTEST: - fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST, 0, vector, length, point); - break; - case SHORTEST_SINGLE: - fast_worked = FastDtoa(v, FAST_DTOA_SHORTEST_SINGLE, 0, - vector, length, point); - break; - case FIXED: - fast_worked = FastFixedDtoa(v, requested_digits, vector, length, point); - break; - case PRECISION: - fast_worked = FastDtoa(v, FAST_DTOA_PRECISION, requested_digits, - vector, length, point); - break; - default: - fast_worked = false; - UNREACHABLE(); - } - if (fast_worked) return; - - // If the fast dtoa didn't succeed use the slower bignum version. - BignumDtoaMode bignum_mode = DtoaToBignumDtoaMode(mode); - BignumDtoa(v, bignum_mode, requested_digits, vector, length, point); - vector[*length] = '\0'; -} - - -// Consumes the given substring from the iterator. -// Returns false, if the substring does not match. -static bool ConsumeSubString(const char** current, - const char* end, - const char* substring) { - ASSERT(**current == *substring); - for (substring++; *substring != '\0'; substring++) { - ++*current; - if (*current == end || **current != *substring) return false; - } - ++*current; - return true; -} - - -// Maximum number of significant digits in decimal representation. -// The longest possible double in decimal representation is -// (2^53 - 1) * 2 ^ -1074 that is (2 ^ 53 - 1) * 5 ^ 1074 / 10 ^ 1074 -// (768 digits). If we parse a number whose first digits are equal to a -// mean of 2 adjacent doubles (that could have up to 769 digits) the result -// must be rounded to the bigger one unless the tail consists of zeros, so -// we don't need to preserve all the digits. -const int kMaxSignificantDigits = 772; - - -// Returns true if a nonspace found and false if the end has reached. -static inline bool AdvanceToNonspace(const char** current, const char* end) { - while (*current != end) { - if (**current != ' ') return true; - ++*current; - } - return false; -} - - -static bool isDigit(int x, int radix) { - return (x >= '0' && x <= '9' && x < '0' + radix) - || (radix > 10 && x >= 'a' && x < 'a' + radix - 10) - || (radix > 10 && x >= 'A' && x < 'A' + radix - 10); -} - - -static double SignedZero(bool sign) { - return sign ? -0.0 : 0.0; -} - - -// Returns true if 'c' is a decimal digit that is valid for the given radix. -// -// The function is small and could be inlined, but VS2012 emitted a warning -// because it constant-propagated the radix and concluded that the last -// condition was always true. By moving it into a separate function the -// compiler wouldn't warn anymore. -static bool IsDecimalDigitForRadix(int c, int radix) { - return '0' <= c && c <= '9' && (c - '0') < radix; -} - -// Returns true if 'c' is a character digit that is valid for the given radix. -// The 'a_character' should be 'a' or 'A'. -// -// The function is small and could be inlined, but VS2012 emitted a warning -// because it constant-propagated the radix and concluded that the first -// condition was always false. By moving it into a separate function the -// compiler wouldn't warn anymore. -static bool IsCharacterDigitForRadix(int c, int radix, char a_character) { - return radix > 10 && c >= a_character && c < a_character + radix - 10; -} - - -// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. -template -static double RadixStringToIeee(const char* current, - const char* end, - bool sign, - bool allow_trailing_junk, - double junk_string_value, - bool read_as_double, - const char** trailing_pointer) { - ASSERT(current != end); - - const int kDoubleSize = Double::kSignificandSize; - const int kSingleSize = Single::kSignificandSize; - const int kSignificandSize = read_as_double? kDoubleSize: kSingleSize; - - // Skip leading 0s. - while (*current == '0') { - ++current; - if (current == end) { - *trailing_pointer = end; - return SignedZero(sign); - } - } - - int64_t number = 0; - int exponent = 0; - const int radix = (1 << radix_log_2); - - do { - int digit; - if (IsDecimalDigitForRadix(*current, radix)) { - digit = static_cast(*current) - '0'; - } else if (IsCharacterDigitForRadix(*current, radix, 'a')) { - digit = static_cast(*current) - 'a' + 10; - } else if (IsCharacterDigitForRadix(*current, radix, 'A')) { - digit = static_cast(*current) - 'A' + 10; - } else { - if (allow_trailing_junk || !AdvanceToNonspace(¤t, end)) { - break; - } else { - return junk_string_value; - } - } - - number = number * radix + digit; - int overflow = static_cast(number >> kSignificandSize); - if (overflow != 0) { - // Overflow occurred. Need to determine which direction to round the - // result. - int overflow_bits_count = 1; - while (overflow > 1) { - overflow_bits_count++; - overflow >>= 1; - } - - int dropped_bits_mask = ((1 << overflow_bits_count) - 1); - int dropped_bits = static_cast(number) & dropped_bits_mask; - number >>= overflow_bits_count; - exponent = overflow_bits_count; - - bool zero_tail = true; - for (;;) { - ++current; - if (current == end || !isDigit(*current, radix)) break; - zero_tail = zero_tail && *current == '0'; - exponent += radix_log_2; - } - - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value; - } - - int middle_value = (1 << (overflow_bits_count - 1)); - if (dropped_bits > middle_value) { - number++; // Rounding up. - } else if (dropped_bits == middle_value) { - // Rounding to even to consistency with decimals: half-way case rounds - // up if significant part is odd and down otherwise. - if ((number & 1) != 0 || !zero_tail) { - number++; // Rounding up. - } - } - - // Rounding up may cause overflow. - if ((number & ((int64_t)1 << kSignificandSize)) != 0) { - exponent++; - number >>= 1; - } - break; - } - ++current; - } while (current != end); - - ASSERT(number < ((int64_t)1 << kSignificandSize)); - ASSERT(static_cast(static_cast(number)) == number); - - *trailing_pointer = current; - - if (exponent == 0) { - if (sign) { - if (number == 0) return -0.0; - number = -number; - } - return static_cast(number); - } - - ASSERT(number != 0); - return Double(DiyFp(number, exponent)).value(); -} - - -double StringToDoubleConverter::StringToIeee( - const char* input, - int length, - int* processed_characters_count, - bool read_as_double) const { - const char* current = input; - const char* end = input + length; - - *processed_characters_count = 0; - - const bool allow_trailing_junk = (flags_ & ALLOW_TRAILING_JUNK) != 0; - const bool allow_leading_spaces = (flags_ & ALLOW_LEADING_SPACES) != 0; - const bool allow_trailing_spaces = (flags_ & ALLOW_TRAILING_SPACES) != 0; - const bool allow_spaces_after_sign = (flags_ & ALLOW_SPACES_AFTER_SIGN) != 0; - - // To make sure that iterator dereferencing is valid the following - // convention is used: - // 1. Each '++current' statement is followed by check for equality to 'end'. - // 2. If AdvanceToNonspace returned false then current == end. - // 3. If 'current' becomes equal to 'end' the function returns or goes to - // 'parsing_done'. - // 4. 'current' is not dereferenced after the 'parsing_done' label. - // 5. Code before 'parsing_done' may rely on 'current != end'. - if (current == end) return empty_string_value_; - - if (allow_leading_spaces || allow_trailing_spaces) { - if (!AdvanceToNonspace(¤t, end)) { - *processed_characters_count = static_cast(current - input); - return empty_string_value_; - } - if (!allow_leading_spaces && (input != current)) { - // No leading spaces allowed, but AdvanceToNonspace moved forward. - return junk_string_value_; - } - } - - // The longest form of simplified number is: "-.1eXXX\0". - const int kBufferSize = kMaxSignificantDigits + 10; - char buffer[kBufferSize]; // NOLINT: size is known at compile time. - int buffer_pos = 0; - - // Exponent will be adjusted if insignificant digits of the integer part - // or insignificant leading zeros of the fractional part are dropped. - int exponent = 0; - int significant_digits = 0; - int insignificant_digits = 0; - bool nonzero_digit_dropped = false; - - bool sign = false; - - if (*current == '+' || *current == '-') { - sign = (*current == '-'); - ++current; - const char* next_non_space = current; - // Skip following spaces (if allowed). - if (!AdvanceToNonspace(&next_non_space, end)) return junk_string_value_; - if (!allow_spaces_after_sign && (current != next_non_space)) { - return junk_string_value_; - } - current = next_non_space; - } - - if (infinity_symbol_ != NULL) { - if (*current == infinity_symbol_[0]) { - if (!ConsumeSubString(¤t, end, infinity_symbol_)) { - return junk_string_value_; - } - - if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { - return junk_string_value_; - } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value_; - } - - ASSERT(buffer_pos == 0); - *processed_characters_count = static_cast(current - input); - return sign ? -Double::Infinity() : Double::Infinity(); - } - } - - if (nan_symbol_ != NULL) { - if (*current == nan_symbol_[0]) { - if (!ConsumeSubString(¤t, end, nan_symbol_)) { - return junk_string_value_; - } - - if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { - return junk_string_value_; - } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value_; - } - - ASSERT(buffer_pos == 0); - *processed_characters_count = static_cast(current - input); - return sign ? -Double::NaN() : Double::NaN(); - } - } - - bool leading_zero = false; - if (*current == '0') { - ++current; - if (current == end) { - *processed_characters_count = static_cast(current - input); - return SignedZero(sign); - } - - leading_zero = true; - - // It could be hexadecimal value. - if ((flags_ & ALLOW_HEX) && (*current == 'x' || *current == 'X')) { - ++current; - if (current == end || !isDigit(*current, 16)) { - return junk_string_value_; // "0x". - } - - const char* tail_pointer = NULL; - double result = RadixStringToIeee<4>(current, - end, - sign, - allow_trailing_junk, - junk_string_value_, - read_as_double, - &tail_pointer); - if (tail_pointer != NULL) { - if (allow_trailing_spaces) AdvanceToNonspace(&tail_pointer, end); - *processed_characters_count = static_cast(tail_pointer - input); - } - return result; - } - - // Ignore leading zeros in the integer part. - while (*current == '0') { - ++current; - if (current == end) { - *processed_characters_count = static_cast(current - input); - return SignedZero(sign); - } - } - } - - bool octal = leading_zero && (flags_ & ALLOW_OCTALS) != 0; - - // Copy significant digits of the integer part (if any) to the buffer. - while (*current >= '0' && *current <= '9') { - if (significant_digits < kMaxSignificantDigits) { - ASSERT(buffer_pos < kBufferSize); - buffer[buffer_pos++] = static_cast(*current); - significant_digits++; - // Will later check if it's an octal in the buffer. - } else { - insignificant_digits++; // Move the digit into the exponential part. - nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; - } - octal = octal && *current < '8'; - ++current; - if (current == end) goto parsing_done; - } - - if (significant_digits == 0) { - octal = false; - } - - if (*current == '.') { - if (octal && !allow_trailing_junk) return junk_string_value_; - if (octal) goto parsing_done; - - ++current; - if (current == end) { - if (significant_digits == 0 && !leading_zero) { - return junk_string_value_; - } else { - goto parsing_done; - } - } - - if (significant_digits == 0) { - // octal = false; - // Integer part consists of 0 or is absent. Significant digits start after - // leading zeros (if any). - while (*current == '0') { - ++current; - if (current == end) { - *processed_characters_count = static_cast(current - input); - return SignedZero(sign); - } - exponent--; // Move this 0 into the exponent. - } - } - - // There is a fractional part. - // We don't emit a '.', but adjust the exponent instead. - while (*current >= '0' && *current <= '9') { - if (significant_digits < kMaxSignificantDigits) { - ASSERT(buffer_pos < kBufferSize); - buffer[buffer_pos++] = static_cast(*current); - significant_digits++; - exponent--; - } else { - // Ignore insignificant digits in the fractional part. - nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; - } - ++current; - if (current == end) goto parsing_done; - } - } - - if (!leading_zero && exponent == 0 && significant_digits == 0) { - // If leading_zeros is true then the string contains zeros. - // If exponent < 0 then string was [+-]\.0*... - // If significant_digits != 0 the string is not equal to 0. - // Otherwise there are no digits in the string. - return junk_string_value_; - } - - // Parse exponential part. - if (*current == 'e' || *current == 'E') { - if (octal && !allow_trailing_junk) return junk_string_value_; - if (octal) goto parsing_done; - ++current; - if (current == end) { - if (allow_trailing_junk) { - goto parsing_done; - } else { - return junk_string_value_; - } - } - char sign = '+'; - if (*current == '+' || *current == '-') { - sign = static_cast(*current); - ++current; - if (current == end) { - if (allow_trailing_junk) { - goto parsing_done; - } else { - return junk_string_value_; - } - } - } - - if (current == end || *current < '0' || *current > '9') { - if (allow_trailing_junk) { - goto parsing_done; - } else { - return junk_string_value_; - } - } - - const int max_exponent = INT_MAX / 2; - ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2); - int num = 0; - do { - // Check overflow. - int digit = *current - '0'; - if (num >= max_exponent / 10 - && !(num == max_exponent / 10 && digit <= max_exponent % 10)) { - num = max_exponent; - } else { - num = num * 10 + digit; - } - ++current; - } while (current != end && *current >= '0' && *current <= '9'); - - exponent += (sign == '-' ? -num : num); - } - - if (!(allow_trailing_spaces || allow_trailing_junk) && (current != end)) { - return junk_string_value_; - } - if (!allow_trailing_junk && AdvanceToNonspace(¤t, end)) { - return junk_string_value_; - } - if (allow_trailing_spaces) { - AdvanceToNonspace(¤t, end); - } - - parsing_done: - exponent += insignificant_digits; - - if (octal) { - double result; - const char* tail_pointer = NULL; - result = RadixStringToIeee<3>(buffer, - buffer + buffer_pos, - sign, - allow_trailing_junk, - junk_string_value_, - read_as_double, - &tail_pointer); - ASSERT(tail_pointer != NULL); - *processed_characters_count = static_cast(current - input); - return result; - } - - if (nonzero_digit_dropped) { - buffer[buffer_pos++] = '1'; - exponent--; - } - - ASSERT(buffer_pos < kBufferSize); - buffer[buffer_pos] = '\0'; - - double converted; - if (read_as_double) { - converted = Strtod(Vector(buffer, buffer_pos), exponent); - } else { - converted = Strtof(Vector(buffer, buffer_pos), exponent); - } - *processed_characters_count = static_cast(current - input); - return sign? -converted: converted; -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/double-conversion.h b/base/poco/Foundation/src/double-conversion.h deleted file mode 100644 index 851049bf7f0..00000000000 --- a/base/poco/Foundation/src/double-conversion.h +++ /dev/null @@ -1,512 +0,0 @@ -// Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ -#define DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ - -#include "utils.h" - -namespace double_conversion -{ - -class DoubleToStringConverter -{ -public: - // When calling ToFixed with a double > 10^kMaxFixedDigitsBeforePoint - // or a requested_digits parameter > kMaxFixedDigitsAfterPoint then the - // function returns false. - static const int kMaxFixedDigitsBeforePoint = 60; - static const int kMaxFixedDigitsAfterPoint = 60; - - // When calling ToExponential with a requested_digits - // parameter > kMaxExponentialDigits then the function returns false. - static const int kMaxExponentialDigits = 120; - - // When calling ToPrecision with a requested_digits - // parameter < kMinPrecisionDigits or requested_digits > kMaxPrecisionDigits - // then the function returns false. - static const int kMinPrecisionDigits = 1; - static const int kMaxPrecisionDigits = 120; - - enum Flags - { - NO_FLAGS = 0, - EMIT_POSITIVE_EXPONENT_SIGN = 1, - EMIT_TRAILING_DECIMAL_POINT = 2, - EMIT_TRAILING_ZERO_AFTER_POINT = 4, - UNIQUE_ZERO = 8 - }; - - // Flags should be a bit-or combination of the possible Flags-enum. - // - NO_FLAGS: no special flags. - // - EMIT_POSITIVE_EXPONENT_SIGN: when the number is converted into exponent - // form, emits a '+' for positive exponents. Example: 1.2e+2. - // - EMIT_TRAILING_DECIMAL_POINT: when the input number is an integer and is - // converted into decimal format then a trailing decimal point is appended. - // Example: 2345.0 is converted to "2345.". - // - EMIT_TRAILING_ZERO_AFTER_POINT: in addition to a trailing decimal point - // emits a trailing '0'-character. This flag requires the - // EXMIT_TRAILING_DECIMAL_POINT flag. - // Example: 2345.0 is converted to "2345.0". - // - UNIQUE_ZERO: "-0.0" is converted to "0.0". - // - // Infinity symbol and nan_symbol provide the string representation for these - // special values. If the string is NULL and the special value is encountered - // then the conversion functions return false. - // - // The exponent_character is used in exponential representations. It is - // usually 'e' or 'E'. - // - // When converting to the shortest representation the converter will - // represent input numbers in decimal format if they are in the interval - // [10^decimal_in_shortest_low; 10^decimal_in_shortest_high[ - // (lower boundary included, greater boundary excluded). - // Example: with decimal_in_shortest_low = -6 and - // decimal_in_shortest_high = 21: - // ToShortest(0.000001) -> "0.000001" - // ToShortest(0.0000001) -> "1e-7" - // ToShortest(111111111111111111111.0) -> "111111111111111110000" - // ToShortest(100000000000000000000.0) -> "100000000000000000000" - // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" - // - // When converting to precision mode the converter may add - // max_leading_padding_zeroes before returning the number in exponential - // format. - // Example with max_leading_padding_zeroes_in_precision_mode = 6. - // ToPrecision(0.0000012345, 2) -> "0.0000012" - // ToPrecision(0.00000012345, 2) -> "1.2e-7" - // Similarly the converter may add up to - // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid - // returning an exponential representation. A zero added by the - // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. - // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: - // ToPrecision(230.0, 2) -> "230" - // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. - // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. - DoubleToStringConverter( - int flags, - const char * infinity_symbol, - const char * nan_symbol, - char exponent_character, - int decimal_in_shortest_low, - int decimal_in_shortest_high, - int max_leading_padding_zeroes_in_precision_mode, - int max_trailing_padding_zeroes_in_precision_mode) - : flags_(flags) - , infinity_symbol_(infinity_symbol) - , nan_symbol_(nan_symbol) - , exponent_character_(exponent_character) - , decimal_in_shortest_low_(decimal_in_shortest_low) - , decimal_in_shortest_high_(decimal_in_shortest_high) - , max_leading_padding_zeroes_in_precision_mode_(max_leading_padding_zeroes_in_precision_mode) - , max_trailing_padding_zeroes_in_precision_mode_(max_trailing_padding_zeroes_in_precision_mode) - { - // When 'trailing zero after the point' is set, then 'trailing point' - // must be set too. - ASSERT(((flags & EMIT_TRAILING_DECIMAL_POINT) != 0) || !((flags & EMIT_TRAILING_ZERO_AFTER_POINT) != 0)); - } - - // Returns a converter following the EcmaScript specification. - static const DoubleToStringConverter & EcmaScriptConverter(); - - // Computes the shortest string of digits that correctly represent the input - // number. Depending on decimal_in_shortest_low and decimal_in_shortest_high - // (see constructor) it then either returns a decimal representation, or an - // exponential representation. - // Example with decimal_in_shortest_low = -6, - // decimal_in_shortest_high = 21, - // EMIT_POSITIVE_EXPONENT_SIGN activated, and - // EMIT_TRAILING_DECIMAL_POINT deactivated: - // ToShortest(0.000001) -> "0.000001" - // ToShortest(0.0000001) -> "1e-7" - // ToShortest(111111111111111111111.0) -> "111111111111111110000" - // ToShortest(100000000000000000000.0) -> "100000000000000000000" - // ToShortest(1111111111111111111111.0) -> "1.1111111111111111e+21" - // - // Note: the conversion may round the output if the returned string - // is accurate enough to uniquely identify the input-number. - // For example the most precise representation of the double 9e59 equals - // "899999999999999918767229449717619953810131273674690656206848", but - // the converter will return the shorter (but still correct) "9e59". - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except when the input value is special and no infinity_symbol or - // nan_symbol has been given to the constructor. - bool ToShortest(double value, StringBuilder * result_builder) const { return ToShortestIeeeNumber(value, result_builder, SHORTEST); } - - // Same as ToShortest, but for single-precision floats. - bool ToShortestSingle(float value, StringBuilder * result_builder) const - { - return ToShortestIeeeNumber(value, result_builder, SHORTEST_SINGLE); - } - - - // Computes a decimal representation with a fixed number of digits after the - // decimal point. The last emitted digit is rounded. - // - // Examples: - // ToFixed(3.12, 1) -> "3.1" - // ToFixed(3.1415, 3) -> "3.142" - // ToFixed(1234.56789, 4) -> "1234.5679" - // ToFixed(1.23, 5) -> "1.23000" - // ToFixed(0.1, 4) -> "0.1000" - // ToFixed(1e30, 2) -> "1000000000000000019884624838656.00" - // ToFixed(0.1, 30) -> "0.100000000000000005551115123126" - // ToFixed(0.1, 17) -> "0.10000000000000001" - // - // If requested_digits equals 0, then the tail of the result depends on - // the EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT. - // Examples, for requested_digits == 0, - // let EMIT_TRAILING_DECIMAL_POINT and EMIT_TRAILING_ZERO_AFTER_POINT be - // - false and false: then 123.45 -> 123 - // 0.678 -> 1 - // - true and false: then 123.45 -> 123. - // 0.678 -> 1. - // - true and true: then 123.45 -> 123.0 - // 0.678 -> 1.0 - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except for the following cases: - // - the input value is special and no infinity_symbol or nan_symbol has - // been provided to the constructor, - // - 'value' > 10^kMaxFixedDigitsBeforePoint, or - // - 'requested_digits' > kMaxFixedDigitsAfterPoint. - // The last two conditions imply that the result will never contain more than - // 1 + kMaxFixedDigitsBeforePoint + 1 + kMaxFixedDigitsAfterPoint characters - // (one additional character for the sign, and one for the decimal point). - bool ToFixed(double value, int requested_digits, StringBuilder * result_builder) const; - - // Computes a representation in exponential format with requested_digits - // after the decimal point. The last emitted digit is rounded. - // If requested_digits equals -1, then the shortest exponential representation - // is computed. - // - // Examples with EMIT_POSITIVE_EXPONENT_SIGN deactivated, and - // exponent_character set to 'e'. - // ToExponential(3.12, 1) -> "3.1e0" - // ToExponential(5.0, 3) -> "5.000e0" - // ToExponential(0.001, 2) -> "1.00e-3" - // ToExponential(3.1415, -1) -> "3.1415e0" - // ToExponential(3.1415, 4) -> "3.1415e0" - // ToExponential(3.1415, 3) -> "3.142e0" - // ToExponential(123456789000000, 3) -> "1.235e14" - // ToExponential(1000000000000000019884624838656.0, -1) -> "1e30" - // ToExponential(1000000000000000019884624838656.0, 32) -> - // "1.00000000000000001988462483865600e30" - // ToExponential(1234, 0) -> "1e3" - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except for the following cases: - // - the input value is special and no infinity_symbol or nan_symbol has - // been provided to the constructor, - // - 'requested_digits' > kMaxExponentialDigits. - // The last condition implies that the result will never contain more than - // kMaxExponentialDigits + 8 characters (the sign, the digit before the - // decimal point, the decimal point, the exponent character, the - // exponent's sign, and at most 3 exponent digits). - bool ToExponential(double value, int requested_digits, StringBuilder * result_builder) const; - - // Computes 'precision' leading digits of the given 'value' and returns them - // either in exponential or decimal format, depending on - // max_{leading|trailing}_padding_zeroes_in_precision_mode (given to the - // constructor). - // The last computed digit is rounded. - // - // Example with max_leading_padding_zeroes_in_precision_mode = 6. - // ToPrecision(0.0000012345, 2) -> "0.0000012" - // ToPrecision(0.00000012345, 2) -> "1.2e-7" - // Similarly the converter may add up to - // max_trailing_padding_zeroes_in_precision_mode in precision mode to avoid - // returning an exponential representation. A zero added by the - // EMIT_TRAILING_ZERO_AFTER_POINT flag is counted for this limit. - // Examples for max_trailing_padding_zeroes_in_precision_mode = 1: - // ToPrecision(230.0, 2) -> "230" - // ToPrecision(230.0, 2) -> "230." with EMIT_TRAILING_DECIMAL_POINT. - // ToPrecision(230.0, 2) -> "2.3e2" with EMIT_TRAILING_ZERO_AFTER_POINT. - // Examples for max_trailing_padding_zeroes_in_precision_mode = 3, and no - // EMIT_TRAILING_ZERO_AFTER_POINT: - // ToPrecision(123450.0, 6) -> "123450" - // ToPrecision(123450.0, 5) -> "123450" - // ToPrecision(123450.0, 4) -> "123500" - // ToPrecision(123450.0, 3) -> "123000" - // ToPrecision(123450.0, 2) -> "1.2e5" - // - // Returns true if the conversion succeeds. The conversion always succeeds - // except for the following cases: - // - the input value is special and no infinity_symbol or nan_symbol has - // been provided to the constructor, - // - precision < kMinPericisionDigits - // - precision > kMaxPrecisionDigits - // The last condition implies that the result will never contain more than - // kMaxPrecisionDigits + 7 characters (the sign, the decimal point, the - // exponent character, the exponent's sign, and at most 3 exponent digits). - bool ToPrecision(double value, int precision, StringBuilder * result_builder) const; - - enum DtoaMode - { - // Produce the shortest correct representation. - // For example the output of 0.299999999999999988897 is (the less accurate - // but correct) 0.3. - SHORTEST, - // Same as SHORTEST, but for single-precision floats. - SHORTEST_SINGLE, - // Produce a fixed number of digits after the decimal point. - // For instance fixed(0.1, 4) becomes 0.1000 - // If the input number is big, the output will be big. - FIXED, - // Fixed number of digits (independent of the decimal point). - PRECISION - }; - - // The maximal number of digits that are needed to emit a double in base 10. - // A higher precision can be achieved by using more digits, but the shortest - // accurate representation of any double will never use more digits than - // kBase10MaximalLength. - // Note that DoubleToAscii null-terminates its input. So the given buffer - // should be at least kBase10MaximalLength + 1 characters long. - static const int kBase10MaximalLength = 17; - - // Converts the given double 'v' to ascii. 'v' must not be NaN, +Infinity, or - // -Infinity. In SHORTEST_SINGLE-mode this restriction also applies to 'v' - // after it has been casted to a single-precision float. That is, in this - // mode static_cast(v) must not be NaN, +Infinity or -Infinity. - // - // The result should be interpreted as buffer * 10^(point-length). - // - // The output depends on the given mode: - // - SHORTEST: produce the least amount of digits for which the internal - // identity requirement is still satisfied. If the digits are printed - // (together with the correct exponent) then reading this number will give - // 'v' again. The buffer will choose the representation that is closest to - // 'v'. If there are two at the same distance, than the one farther away - // from 0 is chosen (halfway cases - ending with 5 - are rounded up). - // In this mode the 'requested_digits' parameter is ignored. - // - SHORTEST_SINGLE: same as SHORTEST but with single-precision. - // - FIXED: produces digits necessary to print a given number with - // 'requested_digits' digits after the decimal point. The produced digits - // might be too short in which case the caller has to fill the remainder - // with '0's. - // Example: toFixed(0.001, 5) is allowed to return buffer="1", point=-2. - // Halfway cases are rounded towards +/-Infinity (away from 0). The call - // toFixed(0.15, 2) thus returns buffer="2", point=0. - // The returned buffer may contain digits that would be truncated from the - // shortest representation of the input. - // - PRECISION: produces 'requested_digits' where the first digit is not '0'. - // Even though the length of produced digits usually equals - // 'requested_digits', the function is allowed to return fewer digits, in - // which case the caller has to fill the missing digits with '0's. - // Halfway cases are again rounded away from 0. - // DoubleToAscii expects the given buffer to be big enough to hold all - // digits and a terminating null-character. In SHORTEST-mode it expects a - // buffer of at least kBase10MaximalLength + 1. In all other modes the - // requested_digits parameter and the padding-zeroes limit the size of the - // output. Don't forget the decimal point, the exponent character and the - // terminating null-character when computing the maximal output size. - // The given length is only used in debug mode to ensure the buffer is big - // enough. - static void - DoubleToAscii(double v, DtoaMode mode, int requested_digits, char * buffer, int buffer_length, bool * sign, int * length, int * point); - -private: - // Implementation for ToShortest and ToShortestSingle. - bool ToShortestIeeeNumber(double value, StringBuilder * result_builder, DtoaMode mode) const; - - // If the value is a special value (NaN or Infinity) constructs the - // corresponding string using the configured infinity/nan-symbol. - // If either of them is NULL or the value is not special then the - // function returns false. - bool HandleSpecialValues(double value, StringBuilder * result_builder) const; - // Constructs an exponential representation (i.e. 1.234e56). - // The given exponent assumes a decimal point after the first decimal digit. - void CreateExponentialRepresentation(const char * decimal_digits, int length, int exponent, StringBuilder * result_builder) const; - // Creates a decimal representation (i.e 1234.5678). - void CreateDecimalRepresentation( - const char * decimal_digits, int length, int decimal_point, int digits_after_point, StringBuilder * result_builder) const; - - const int flags_; - const char * const infinity_symbol_; - const char * const nan_symbol_; - const char exponent_character_; - const int decimal_in_shortest_low_; - const int decimal_in_shortest_high_; - const int max_leading_padding_zeroes_in_precision_mode_; - const int max_trailing_padding_zeroes_in_precision_mode_; - - DISALLOW_IMPLICIT_CONSTRUCTORS(DoubleToStringConverter); -}; - - -class StringToDoubleConverter -{ -public: - // Enumeration for allowing octals and ignoring junk when converting - // strings to numbers. - enum Flags - { - NO_FLAGS = 0, - ALLOW_HEX = 1, - ALLOW_OCTALS = 2, - ALLOW_TRAILING_JUNK = 4, - ALLOW_LEADING_SPACES = 8, - ALLOW_TRAILING_SPACES = 16, - ALLOW_SPACES_AFTER_SIGN = 32 - }; - - // Flags should be a bit-or combination of the possible Flags-enum. - // - NO_FLAGS: no special flags. - // - ALLOW_HEX: recognizes the prefix "0x". Hex numbers may only be integers. - // Ex: StringToDouble("0x1234") -> 4660.0 - // In StringToDouble("0x1234.56") the characters ".56" are trailing - // junk. The result of the call is hence dependent on - // the ALLOW_TRAILING_JUNK flag and/or the junk value. - // With this flag "0x" is a junk-string. Even with ALLOW_TRAILING_JUNK, - // the string will not be parsed as "0" followed by junk. - // - // - ALLOW_OCTALS: recognizes the prefix "0" for octals: - // If a sequence of octal digits starts with '0', then the number is - // read as octal integer. Octal numbers may only be integers. - // Ex: StringToDouble("01234") -> 668.0 - // StringToDouble("012349") -> 12349.0 // Not a sequence of octal - // // digits. - // In StringToDouble("01234.56") the characters ".56" are trailing - // junk. The result of the call is hence dependent on - // the ALLOW_TRAILING_JUNK flag and/or the junk value. - // In StringToDouble("01234e56") the characters "e56" are trailing - // junk, too. - // - ALLOW_TRAILING_JUNK: ignore trailing characters that are not part of - // a double literal. - // - ALLOW_LEADING_SPACES: skip over leading spaces. - // - ALLOW_TRAILING_SPACES: ignore trailing spaces. - // - ALLOW_SPACES_AFTER_SIGN: ignore spaces after the sign. - // Ex: StringToDouble("- 123.2") -> -123.2. - // StringToDouble("+ 123.2") -> 123.2 - // - // empty_string_value is returned when an empty string is given as input. - // If ALLOW_LEADING_SPACES or ALLOW_TRAILING_SPACES are set, then a string - // containing only spaces is converted to the 'empty_string_value', too. - // - // junk_string_value is returned when - // a) ALLOW_TRAILING_JUNK is not set, and a junk character (a character not - // part of a double-literal) is found. - // b) ALLOW_TRAILING_JUNK is set, but the string does not start with a - // double literal. - // - // infinity_symbol and nan_symbol are strings that are used to detect - // inputs that represent infinity and NaN. They can be null, in which case - // they are ignored. - // The conversion routine first reads any possible signs. Then it compares the - // following character of the input-string with the first character of - // the infinity, and nan-symbol. If either matches, the function assumes, that - // a match has been found, and expects the following input characters to match - // the remaining characters of the special-value symbol. - // This means that the following restrictions apply to special-value symbols: - // - they must not start with signs ('+', or '-'), - // - they must not have the same first character. - // - they must not start with digits. - // - // Examples: - // flags = ALLOW_HEX | ALLOW_TRAILING_JUNK, - // empty_string_value = 0.0, - // junk_string_value = NaN, - // infinity_symbol = "infinity", - // nan_symbol = "nan": - // StringToDouble("0x1234") -> 4660.0. - // StringToDouble("0x1234K") -> 4660.0. - // StringToDouble("") -> 0.0 // empty_string_value. - // StringToDouble(" ") -> NaN // junk_string_value. - // StringToDouble(" 1") -> NaN // junk_string_value. - // StringToDouble("0x") -> NaN // junk_string_value. - // StringToDouble("-123.45") -> -123.45. - // StringToDouble("--123.45") -> NaN // junk_string_value. - // StringToDouble("123e45") -> 123e45. - // StringToDouble("123E45") -> 123e45. - // StringToDouble("123e+45") -> 123e45. - // StringToDouble("123E-45") -> 123e-45. - // StringToDouble("123e") -> 123.0 // trailing junk ignored. - // StringToDouble("123e-") -> 123.0 // trailing junk ignored. - // StringToDouble("+NaN") -> NaN // NaN string literal. - // StringToDouble("-infinity") -> -inf. // infinity literal. - // StringToDouble("Infinity") -> NaN // junk_string_value. - // - // flags = ALLOW_OCTAL | ALLOW_LEADING_SPACES, - // empty_string_value = 0.0, - // junk_string_value = NaN, - // infinity_symbol = NULL, - // nan_symbol = NULL: - // StringToDouble("0x1234") -> NaN // junk_string_value. - // StringToDouble("01234") -> 668.0. - // StringToDouble("") -> 0.0 // empty_string_value. - // StringToDouble(" ") -> 0.0 // empty_string_value. - // StringToDouble(" 1") -> 1.0 - // StringToDouble("0x") -> NaN // junk_string_value. - // StringToDouble("0123e45") -> NaN // junk_string_value. - // StringToDouble("01239E45") -> 1239e45. - // StringToDouble("-infinity") -> NaN // junk_string_value. - // StringToDouble("NaN") -> NaN // junk_string_value. - StringToDoubleConverter( - int flags, double empty_string_value, double junk_string_value, const char * infinity_symbol, const char * nan_symbol) - : flags_(flags) - , empty_string_value_(empty_string_value) - , junk_string_value_(junk_string_value) - , infinity_symbol_(infinity_symbol) - , nan_symbol_(nan_symbol) - { - } - - // Performs the conversion. - // The output parameter 'processed_characters_count' is set to the number - // of characters that have been processed to read the number. - // Spaces than are processed with ALLOW_{LEADING|TRAILING}_SPACES are included - // in the 'processed_characters_count'. Trailing junk is never included. - double StringToDouble(const char * buffer, int length, int * processed_characters_count) const - { - return StringToIeee(buffer, length, processed_characters_count, true); - } - - // Same as StringToDouble but reads a float. - // Note that this is not equivalent to static_cast(StringToDouble(...)) - // due to potential double-rounding. - float StringToFloat(const char * buffer, int length, int * processed_characters_count) const - { - return static_cast(StringToIeee(buffer, length, processed_characters_count, false)); - } - -private: - const int flags_; - const double empty_string_value_; - const double junk_string_value_; - const char * const infinity_symbol_; - const char * const nan_symbol_; - - double StringToIeee(const char * buffer, int length, int * processed_characters_count, bool read_as_double) const; - - DISALLOW_IMPLICIT_CONSTRUCTORS(StringToDoubleConverter); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_DOUBLE_CONVERSION_H_ diff --git a/base/poco/Foundation/src/fast-dtoa.cc b/base/poco/Foundation/src/fast-dtoa.cc deleted file mode 100644 index a58f4d4487a..00000000000 --- a/base/poco/Foundation/src/fast-dtoa.cc +++ /dev/null @@ -1,665 +0,0 @@ -// Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "fast-dtoa.h" - -#include "cached-powers.h" -#include "diy-fp.h" -#include "ieee.h" - -namespace double_conversion { - -// The minimal and maximal target exponent define the range of w's binary -// exponent, where 'w' is the result of multiplying the input by a cached power -// of ten. -// -// A different range might be chosen on a different platform, to optimize digit -// generation, but a smaller range requires more powers of ten to be cached. -static const int kMinimalTargetExponent = -60; -static const int kMaximalTargetExponent = -32; - - -// Adjusts the last digit of the generated number, and screens out generated -// solutions that may be inaccurate. A solution may be inaccurate if it is -// outside the safe interval, or if we cannot prove that it is closer to the -// input than a neighboring representation of the same length. -// -// Input: * buffer containing the digits of too_high / 10^kappa -// * the buffer's length -// * distance_too_high_w == (too_high - w).f() * unit -// * unsafe_interval == (too_high - too_low).f() * unit -// * rest = (too_high - buffer * 10^kappa).f() * unit -// * ten_kappa = 10^kappa * unit -// * unit = the common multiplier -// Output: returns true if the buffer is guaranteed to contain the closest -// representable number to the input. -// Modifies the generated digits in the buffer to approach (round towards) w. -static bool RoundWeed(Vector buffer, - int length, - uint64_t distance_too_high_w, - uint64_t unsafe_interval, - uint64_t rest, - uint64_t ten_kappa, - uint64_t unit) { - uint64_t small_distance = distance_too_high_w - unit; - uint64_t big_distance = distance_too_high_w + unit; - // Let w_low = too_high - big_distance, and - // w_high = too_high - small_distance. - // Note: w_low < w < w_high - // - // The real w (* unit) must lie somewhere inside the interval - // ]w_low; w_high[ (often written as "(w_low; w_high)") - - // Basically the buffer currently contains a number in the unsafe interval - // ]too_low; too_high[ with too_low < w < too_high - // - // too_high - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // ^v 1 unit ^ ^ ^ ^ - // boundary_high --------------------- . . . . - // ^v 1 unit . . . . - // - - - - - - - - - - - - - - - - - - - + - - + - - - - - - . . - // . . ^ . . - // . big_distance . . . - // . . . . rest - // small_distance . . . . - // v . . . . - // w_high - - - - - - - - - - - - - - - - - - . . . . - // ^v 1 unit . . . . - // w ---------------------------------------- . . . . - // ^v 1 unit v . . . - // w_low - - - - - - - - - - - - - - - - - - - - - . . . - // . . v - // buffer --------------------------------------------------+-------+-------- - // . . - // safe_interval . - // v . - // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - . - // ^v 1 unit . - // boundary_low ------------------------- unsafe_interval - // ^v 1 unit v - // too_low - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - // - // - // Note that the value of buffer could lie anywhere inside the range too_low - // to too_high. - // - // boundary_low, boundary_high and w are approximations of the real boundaries - // and v (the input number). They are guaranteed to be precise up to one unit. - // In fact the error is guaranteed to be strictly less than one unit. - // - // Anything that lies outside the unsafe interval is guaranteed not to round - // to v when read again. - // Anything that lies inside the safe interval is guaranteed to round to v - // when read again. - // If the number inside the buffer lies inside the unsafe interval but not - // inside the safe interval then we simply do not know and bail out (returning - // false). - // - // Similarly we have to take into account the imprecision of 'w' when finding - // the closest representation of 'w'. If we have two potential - // representations, and one is closer to both w_low and w_high, then we know - // it is closer to the actual value v. - // - // By generating the digits of too_high we got the largest (closest to - // too_high) buffer that is still in the unsafe interval. In the case where - // w_high < buffer < too_high we try to decrement the buffer. - // This way the buffer approaches (rounds towards) w. - // There are 3 conditions that stop the decrementation process: - // 1) the buffer is already below w_high - // 2) decrementing the buffer would make it leave the unsafe interval - // 3) decrementing the buffer would yield a number below w_high and farther - // away than the current number. In other words: - // (buffer{-1} < w_high) && w_high - buffer{-1} > buffer - w_high - // Instead of using the buffer directly we use its distance to too_high. - // Conceptually rest ~= too_high - buffer - // We need to do the following tests in this order to avoid over- and - // underflows. - ASSERT(rest <= unsafe_interval); - while (rest < small_distance && // Negated condition 1 - unsafe_interval - rest >= ten_kappa && // Negated condition 2 - (rest + ten_kappa < small_distance || // buffer{-1} > w_high - small_distance - rest >= rest + ten_kappa - small_distance)) { - buffer[length - 1]--; - rest += ten_kappa; - } - - // We have approached w+ as much as possible. We now test if approaching w- - // would require changing the buffer. If yes, then we have two possible - // representations close to w, but we cannot decide which one is closer. - if (rest < big_distance && - unsafe_interval - rest >= ten_kappa && - (rest + ten_kappa < big_distance || - big_distance - rest > rest + ten_kappa - big_distance)) { - return false; - } - - // Weeding test. - // The safe interval is [too_low + 2 ulp; too_high - 2 ulp] - // Since too_low = too_high - unsafe_interval this is equivalent to - // [too_high - unsafe_interval + 4 ulp; too_high - 2 ulp] - // Conceptually we have: rest ~= too_high - buffer - return (2 * unit <= rest) && (rest <= unsafe_interval - 4 * unit); -} - - -// Rounds the buffer upwards if the result is closer to v by possibly adding -// 1 to the buffer. If the precision of the calculation is not sufficient to -// round correctly, return false. -// The rounding might shift the whole buffer in which case the kappa is -// adjusted. For example "99", kappa = 3 might become "10", kappa = 4. -// -// If 2*rest > ten_kappa then the buffer needs to be round up. -// rest can have an error of +/- 1 unit. This function accounts for the -// imprecision and returns false, if the rounding direction cannot be -// unambiguously determined. -// -// Precondition: rest < ten_kappa. -static bool RoundWeedCounted(Vector buffer, - int length, - uint64_t rest, - uint64_t ten_kappa, - uint64_t unit, - int* kappa) { - ASSERT(rest < ten_kappa); - // The following tests are done in a specific order to avoid overflows. They - // will work correctly with any uint64 values of rest < ten_kappa and unit. - // - // If the unit is too big, then we don't know which way to round. For example - // a unit of 50 means that the real number lies within rest +/- 50. If - // 10^kappa == 40 then there is no way to tell which way to round. - if (unit >= ten_kappa) return false; - // Even if unit is just half the size of 10^kappa we are already completely - // lost. (And after the previous test we know that the expression will not - // over/underflow.) - if (ten_kappa - unit <= unit) return false; - // If 2 * (rest + unit) <= 10^kappa we can safely round down. - if ((ten_kappa - rest > rest) && (ten_kappa - 2 * rest >= 2 * unit)) { - return true; - } - // If 2 * (rest - unit) >= 10^kappa, then we can safely round up. - if ((rest > unit) && (ten_kappa - (rest - unit) <= (rest - unit))) { - // Increment the last digit recursively until we find a non '9' digit. - buffer[length - 1]++; - for (int i = length - 1; i > 0; --i) { - if (buffer[i] != '0' + 10) break; - buffer[i] = '0'; - buffer[i - 1]++; - } - // If the first digit is now '0'+ 10 we had a buffer with all '9's. With the - // exception of the first digit all digits are now '0'. Simply switch the - // first digit to '1' and adjust the kappa. Example: "99" becomes "10" and - // the power (the kappa) is increased. - if (buffer[0] == '0' + 10) { - buffer[0] = '1'; - (*kappa) += 1; - } - return true; - } - return false; -} - -// Returns the biggest power of ten that is less than or equal to the given -// number. We furthermore receive the maximum number of bits 'number' has. -// -// Returns power == 10^(exponent_plus_one-1) such that -// power <= number < power * 10. -// If number_bits == 0 then 0^(0-1) is returned. -// The number of bits must be <= 32. -// Precondition: number < (1 << (number_bits + 1)). - -// Inspired by the method for finding an integer log base 10 from here: -// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10 -static unsigned int const kSmallPowersOfTen[] = - {0, 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, - 1000000000}; - -static void BiggestPowerTen(uint32_t number, - int number_bits, - uint32_t* power, - int* exponent_plus_one) { - ASSERT(number < (1u << (number_bits + 1))); - // 1233/4096 is approximately 1/lg(10). - int exponent_plus_one_guess = ((number_bits + 1) * 1233 >> 12); - // We increment to skip over the first entry in the kPowersOf10 table. - // Note: kPowersOf10[i] == 10^(i-1). - exponent_plus_one_guess++; - // We don't have any guarantees that 2^number_bits <= number. - if (number < kSmallPowersOfTen[exponent_plus_one_guess] && exponent_plus_one_guess > 0) { - exponent_plus_one_guess--; - } - *power = kSmallPowersOfTen[exponent_plus_one_guess]; - *exponent_plus_one = exponent_plus_one_guess; -} - -// Generates the digits of input number w. -// w is a floating-point number (DiyFp), consisting of a significand and an -// exponent. Its exponent is bounded by kMinimalTargetExponent and -// kMaximalTargetExponent. -// Hence -60 <= w.e() <= -32. -// -// Returns false if it fails, in which case the generated digits in the buffer -// should not be used. -// Preconditions: -// * low, w and high are correct up to 1 ulp (unit in the last place). That -// is, their error must be less than a unit of their last digits. -// * low.e() == w.e() == high.e() -// * low < w < high, and taking into account their error: low~ <= high~ -// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent -// Postconditions: returns false if procedure fails. -// otherwise: -// * buffer is not null-terminated, but len contains the number of digits. -// * buffer contains the shortest possible decimal digit-sequence -// such that LOW < buffer * 10^kappa < HIGH, where LOW and HIGH are the -// correct values of low and high (without their error). -// * if more than one decimal representation gives the minimal number of -// decimal digits then the one closest to W (where W is the correct value -// of w) is chosen. -// Remark: this procedure takes into account the imprecision of its input -// numbers. If the precision is not enough to guarantee all the postconditions -// then false is returned. This usually happens rarely (~0.5%). -// -// Say, for the sake of example, that -// w.e() == -48, and w.f() == 0x1234567890abcdef -// w's value can be computed by w.f() * 2^w.e() -// We can obtain w's integral digits by simply shifting w.f() by -w.e(). -// -> w's integral part is 0x1234 -// w's fractional part is therefore 0x567890abcdef. -// Printing w's integral part is easy (simply print 0x1234 in decimal). -// In order to print its fraction we repeatedly multiply the fraction by 10 and -// get each digit. Example the first digit after the point would be computed by -// (0x567890abcdef * 10) >> 48. -> 3 -// The whole thing becomes slightly more complicated because we want to stop -// once we have enough digits. That is, once the digits inside the buffer -// represent 'w' we can stop. Everything inside the interval low - high -// represents w. However we have to pay attention to low, high and w's -// imprecision. -static bool DigitGen(DiyFp low, - DiyFp w, - DiyFp high, - Vector buffer, - int* length, - int* kappa) { - ASSERT(low.e() == w.e() && w.e() == high.e()); - ASSERT(low.f() + 1 <= high.f() - 1); - ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); - // low, w and high are imprecise, but by less than one ulp (unit in the last - // place). - // If we remove (resp. add) 1 ulp from low (resp. high) we are certain that - // the new numbers are outside of the interval we want the final - // representation to lie in. - // Inversely adding (resp. removing) 1 ulp from low (resp. high) would yield - // numbers that are certain to lie in the interval. We will use this fact - // later on. - // We will now start by generating the digits within the uncertain - // interval. Later we will weed out representations that lie outside the safe - // interval and thus _might_ lie outside the correct interval. - uint64_t unit = 1; - DiyFp too_low = DiyFp(low.f() - unit, low.e()); - DiyFp too_high = DiyFp(high.f() + unit, high.e()); - // too_low and too_high are guaranteed to lie outside the interval we want the - // generated number in. - DiyFp unsafe_interval = DiyFp::Minus(too_high, too_low); - // We now cut the input number into two parts: the integral digits and the - // fractionals. We will not write any decimal separator though, but adapt - // kappa instead. - // Reminder: we are currently computing the digits (stored inside the buffer) - // such that: too_low < buffer * 10^kappa < too_high - // We use too_high for the digit_generation and stop as soon as possible. - // If we stop early we effectively round down. - DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); - // Division by one is a shift. - uint32_t integrals = static_cast(too_high.f() >> -one.e()); - // Modulo by one is an and. - uint64_t fractionals = too_high.f() & (one.f() - 1); - uint32_t divisor; - int divisor_exponent_plus_one; - BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), - &divisor, &divisor_exponent_plus_one); - *kappa = divisor_exponent_plus_one; - *length = 0; - // Loop invariant: buffer = too_high / 10^kappa (integer division) - // The invariant holds for the first iteration: kappa has been initialized - // with the divisor exponent + 1. And the divisor is the biggest power of ten - // that is smaller than integrals. - while (*kappa > 0) { - int digit = integrals / divisor; - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - integrals %= divisor; - (*kappa)--; - // Note that kappa now equals the exponent of the divisor and that the - // invariant thus holds again. - uint64_t rest = - (static_cast(integrals) << -one.e()) + fractionals; - // Invariant: too_high = buffer * 10^kappa + DiyFp(rest, one.e()) - // Reminder: unsafe_interval.e() == one.e() - if (rest < unsafe_interval.f()) { - // Rounding down (by not emitting the remaining digits) yields a number - // that lies within the unsafe interval. - return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f(), - unsafe_interval.f(), rest, - static_cast(divisor) << -one.e(), unit); - } - divisor /= 10; - } - - // The integrals have been generated. We are at the point of the decimal - // separator. In the following loop we simply multiply the remaining digits by - // 10 and divide by one. We just need to pay attention to multiply associated - // data (like the interval or 'unit'), too. - // Note that the multiplication by 10 does not overflow, because w.e >= -60 - // and thus one.e >= -60. - ASSERT(one.e() >= -60); - ASSERT(fractionals < one.f()); - ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); - for (;;) { - fractionals *= 10; - unit *= 10; - unsafe_interval.set_f(unsafe_interval.f() * 10); - // Integer division by one. - int digit = static_cast(fractionals >> -one.e()); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - fractionals &= one.f() - 1; // Modulo by one. - (*kappa)--; - if (fractionals < unsafe_interval.f()) { - return RoundWeed(buffer, *length, DiyFp::Minus(too_high, w).f() * unit, - unsafe_interval.f(), fractionals, one.f(), unit); - } - } -} - - - -// Generates (at most) requested_digits digits of input number w. -// w is a floating-point number (DiyFp), consisting of a significand and an -// exponent. Its exponent is bounded by kMinimalTargetExponent and -// kMaximalTargetExponent. -// Hence -60 <= w.e() <= -32. -// -// Returns false if it fails, in which case the generated digits in the buffer -// should not be used. -// Preconditions: -// * w is correct up to 1 ulp (unit in the last place). That -// is, its error must be strictly less than a unit of its last digit. -// * kMinimalTargetExponent <= w.e() <= kMaximalTargetExponent -// -// Postconditions: returns false if procedure fails. -// otherwise: -// * buffer is not null-terminated, but length contains the number of -// digits. -// * the representation in buffer is the most precise representation of -// requested_digits digits. -// * buffer contains at most requested_digits digits of w. If there are less -// than requested_digits digits then some trailing '0's have been removed. -// * kappa is such that -// w = buffer * 10^kappa + eps with |eps| < 10^kappa / 2. -// -// Remark: This procedure takes into account the imprecision of its input -// numbers. If the precision is not enough to guarantee all the postconditions -// then false is returned. This usually happens rarely, but the failure-rate -// increases with higher requested_digits. -static bool DigitGenCounted(DiyFp w, - int requested_digits, - Vector buffer, - int* length, - int* kappa) { - ASSERT(kMinimalTargetExponent <= w.e() && w.e() <= kMaximalTargetExponent); - ASSERT(kMinimalTargetExponent >= -60); - ASSERT(kMaximalTargetExponent <= -32); - // w is assumed to have an error less than 1 unit. Whenever w is scaled we - // also scale its error. - uint64_t w_error = 1; - // We cut the input number into two parts: the integral digits and the - // fractional digits. We don't emit any decimal separator, but adapt kappa - // instead. Example: instead of writing "1.2" we put "12" into the buffer and - // increase kappa by 1. - DiyFp one = DiyFp(static_cast(1) << -w.e(), w.e()); - // Division by one is a shift. - uint32_t integrals = static_cast(w.f() >> -one.e()); - // Modulo by one is an and. - uint64_t fractionals = w.f() & (one.f() - 1); - uint32_t divisor; - int divisor_exponent_plus_one; - BiggestPowerTen(integrals, DiyFp::kSignificandSize - (-one.e()), - &divisor, &divisor_exponent_plus_one); - *kappa = divisor_exponent_plus_one; - *length = 0; - - // Loop invariant: buffer = w / 10^kappa (integer division) - // The invariant holds for the first iteration: kappa has been initialized - // with the divisor exponent + 1. And the divisor is the biggest power of ten - // that is smaller than 'integrals'. - while (*kappa > 0) { - int digit = integrals / divisor; - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - requested_digits--; - integrals %= divisor; - (*kappa)--; - // Note that kappa now equals the exponent of the divisor and that the - // invariant thus holds again. - if (requested_digits == 0) break; - divisor /= 10; - } - - if (requested_digits == 0) { - uint64_t rest = - (static_cast(integrals) << -one.e()) + fractionals; - return RoundWeedCounted(buffer, *length, rest, - static_cast(divisor) << -one.e(), w_error, - kappa); - } - - // The integrals have been generated. We are at the point of the decimal - // separator. In the following loop we simply multiply the remaining digits by - // 10 and divide by one. We just need to pay attention to multiply associated - // data (the 'unit'), too. - // Note that the multiplication by 10 does not overflow, because w.e >= -60 - // and thus one.e >= -60. - ASSERT(one.e() >= -60); - ASSERT(fractionals < one.f()); - ASSERT(UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF) / 10 >= one.f()); - while (requested_digits > 0 && fractionals > w_error) { - fractionals *= 10; - w_error *= 10; - // Integer division by one. - int digit = static_cast(fractionals >> -one.e()); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - requested_digits--; - fractionals &= one.f() - 1; // Modulo by one. - (*kappa)--; - } - if (requested_digits != 0) return false; - return RoundWeedCounted(buffer, *length, fractionals, one.f(), w_error, - kappa); -} - - -// Provides a decimal representation of v. -// Returns true if it succeeds, otherwise the result cannot be trusted. -// There will be *length digits inside the buffer (not null-terminated). -// If the function returns true then -// v == (double) (buffer * 10^decimal_exponent). -// The digits in the buffer are the shortest representation possible: no -// 0.09999999999999999 instead of 0.1. The shorter representation will even be -// chosen even if the longer one would be closer to v. -// The last digit will be closest to the actual v. That is, even if several -// digits might correctly yield 'v' when read again, the closest will be -// computed. -static bool Grisu3(double v, - FastDtoaMode mode, - Vector buffer, - int* length, - int* decimal_exponent) { - DiyFp w = Double(v).AsNormalizedDiyFp(); - // boundary_minus and boundary_plus are the boundaries between v and its - // closest floating-point neighbors. Any number strictly between - // boundary_minus and boundary_plus will round to v when convert to a double. - // Grisu3 will never output representations that lie exactly on a boundary. - DiyFp boundary_minus, boundary_plus; - if (mode == FAST_DTOA_SHORTEST) { - Double(v).NormalizedBoundaries(&boundary_minus, &boundary_plus); - } else { - ASSERT(mode == FAST_DTOA_SHORTEST_SINGLE); - float single_v = static_cast(v); - Single(single_v).NormalizedBoundaries(&boundary_minus, &boundary_plus); - } - ASSERT(boundary_plus.e() == w.e()); - DiyFp ten_mk; // Cached power of ten: 10^-k - int mk; // -k - int ten_mk_minimal_binary_exponent = - kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); - int ten_mk_maximal_binary_exponent = - kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); - PowersOfTenCache::GetCachedPowerForBinaryExponentRange( - ten_mk_minimal_binary_exponent, - ten_mk_maximal_binary_exponent, - &ten_mk, &mk); - ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + - DiyFp::kSignificandSize) && - (kMaximalTargetExponent >= w.e() + ten_mk.e() + - DiyFp::kSignificandSize)); - // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a - // 64 bit significand and ten_mk is thus only precise up to 64 bits. - - // The DiyFp::Times procedure rounds its result, and ten_mk is approximated - // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now - // off by a small amount. - // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. - // In other words: let f = scaled_w.f() and e = scaled_w.e(), then - // (f-1) * 2^e < w*10^k < (f+1) * 2^e - DiyFp scaled_w = DiyFp::Times(w, ten_mk); - ASSERT(scaled_w.e() == - boundary_plus.e() + ten_mk.e() + DiyFp::kSignificandSize); - // In theory it would be possible to avoid some recomputations by computing - // the difference between w and boundary_minus/plus (a power of 2) and to - // compute scaled_boundary_minus/plus by subtracting/adding from - // scaled_w. However the code becomes much less readable and the speed - // enhancements are not terrific. - DiyFp scaled_boundary_minus = DiyFp::Times(boundary_minus, ten_mk); - DiyFp scaled_boundary_plus = DiyFp::Times(boundary_plus, ten_mk); - - // DigitGen will generate the digits of scaled_w. Therefore we have - // v == (double) (scaled_w * 10^-mk). - // Set decimal_exponent == -mk and pass it to DigitGen. If scaled_w is not an - // integer than it will be updated. For instance if scaled_w == 1.23 then - // the buffer will be filled with "123" und the decimal_exponent will be - // decreased by 2. - int kappa; - bool result = DigitGen(scaled_boundary_minus, scaled_w, scaled_boundary_plus, - buffer, length, &kappa); - *decimal_exponent = -mk + kappa; - return result; -} - - -// The "counted" version of grisu3 (see above) only generates requested_digits -// number of digits. This version does not generate the shortest representation, -// and with enough requested digits 0.1 will at some point print as 0.9999999... -// Grisu3 is too imprecise for real halfway cases (1.5 will not work) and -// therefore the rounding strategy for halfway cases is irrelevant. -static bool Grisu3Counted(double v, - int requested_digits, - Vector buffer, - int* length, - int* decimal_exponent) { - DiyFp w = Double(v).AsNormalizedDiyFp(); - DiyFp ten_mk; // Cached power of ten: 10^-k - int mk; // -k - int ten_mk_minimal_binary_exponent = - kMinimalTargetExponent - (w.e() + DiyFp::kSignificandSize); - int ten_mk_maximal_binary_exponent = - kMaximalTargetExponent - (w.e() + DiyFp::kSignificandSize); - PowersOfTenCache::GetCachedPowerForBinaryExponentRange( - ten_mk_minimal_binary_exponent, - ten_mk_maximal_binary_exponent, - &ten_mk, &mk); - ASSERT((kMinimalTargetExponent <= w.e() + ten_mk.e() + - DiyFp::kSignificandSize) && - (kMaximalTargetExponent >= w.e() + ten_mk.e() + - DiyFp::kSignificandSize)); - // Note that ten_mk is only an approximation of 10^-k. A DiyFp only contains a - // 64 bit significand and ten_mk is thus only precise up to 64 bits. - - // The DiyFp::Times procedure rounds its result, and ten_mk is approximated - // too. The variable scaled_w (as well as scaled_boundary_minus/plus) are now - // off by a small amount. - // In fact: scaled_w - w*10^k < 1ulp (unit in the last place) of scaled_w. - // In other words: let f = scaled_w.f() and e = scaled_w.e(), then - // (f-1) * 2^e < w*10^k < (f+1) * 2^e - DiyFp scaled_w = DiyFp::Times(w, ten_mk); - - // We now have (double) (scaled_w * 10^-mk). - // DigitGen will generate the first requested_digits digits of scaled_w and - // return together with a kappa such that scaled_w ~= buffer * 10^kappa. (It - // will not always be exactly the same since DigitGenCounted only produces a - // limited number of digits.) - int kappa; - bool result = DigitGenCounted(scaled_w, requested_digits, - buffer, length, &kappa); - *decimal_exponent = -mk + kappa; - return result; -} - - -bool FastDtoa(double v, - FastDtoaMode mode, - int requested_digits, - Vector buffer, - int* length, - int* decimal_point) { - ASSERT(v > 0); - ASSERT(!Double(v).IsSpecial()); - - bool result = false; - int decimal_exponent = 0; - switch (mode) { - case FAST_DTOA_SHORTEST: - case FAST_DTOA_SHORTEST_SINGLE: - result = Grisu3(v, mode, buffer, length, &decimal_exponent); - break; - case FAST_DTOA_PRECISION: - result = Grisu3Counted(v, requested_digits, - buffer, length, &decimal_exponent); - break; - default: - UNREACHABLE(); - } - if (result) { - *decimal_point = *length + decimal_exponent; - buffer[*length] = '\0'; - } - return result; -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/fast-dtoa.h b/base/poco/Foundation/src/fast-dtoa.h deleted file mode 100644 index dc3be8b71b0..00000000000 --- a/base/poco/Foundation/src/fast-dtoa.h +++ /dev/null @@ -1,85 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_FAST_DTOA_H_ -#define DOUBLE_CONVERSION_FAST_DTOA_H_ - -#include "utils.h" - -namespace double_conversion -{ - -enum FastDtoaMode -{ - // Computes the shortest representation of the given input. The returned - // result will be the most accurate number of this length. Longer - // representations might be more accurate. - FAST_DTOA_SHORTEST, - // Same as FAST_DTOA_SHORTEST but for single-precision floats. - FAST_DTOA_SHORTEST_SINGLE, - // Computes a representation where the precision (number of digits) is - // given as input. The precision is independent of the decimal point. - FAST_DTOA_PRECISION -}; - -// FastDtoa will produce at most kFastDtoaMaximalLength digits. This does not -// include the terminating '\0' character. -static const int kFastDtoaMaximalLength = 17; -// Same for single-precision numbers. -static const int kFastDtoaMaximalSingleLength = 9; - -// Provides a decimal representation of v. -// The result should be interpreted as buffer * 10^(point - length). -// -// Precondition: -// * v must be a strictly positive finite double. -// -// Returns true if it succeeds, otherwise the result can not be trusted. -// There will be *length digits inside the buffer followed by a null terminator. -// If the function returns true and mode equals -// - FAST_DTOA_SHORTEST, then -// the parameter requested_digits is ignored. -// The result satisfies -// v == (double) (buffer * 10^(point - length)). -// The digits in the buffer are the shortest representation possible. E.g. -// if 0.099999999999 and 0.1 represent the same double then "1" is returned -// with point = 0. -// The last digit will be closest to the actual v. That is, even if several -// digits might correctly yield 'v' when read again, the buffer will contain -// the one closest to v. -// - FAST_DTOA_PRECISION, then -// the buffer contains requested_digits digits. -// the difference v - (buffer * 10^(point-length)) is closest to zero for -// all possible representations of requested_digits digits. -// If there are two values that are equally close, then FastDtoa returns -// false. -// For both modes the buffer must be large enough to hold the result. -bool FastDtoa(double d, FastDtoaMode mode, int requested_digits, Vector buffer, int * length, int * decimal_point); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_FAST_DTOA_H_ diff --git a/base/poco/Foundation/src/fixed-dtoa.cc b/base/poco/Foundation/src/fixed-dtoa.cc deleted file mode 100644 index 390e823d95e..00000000000 --- a/base/poco/Foundation/src/fixed-dtoa.cc +++ /dev/null @@ -1,404 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include - -#include "fixed-dtoa.h" -#include "ieee.h" - -namespace double_conversion { - -// Represents a 128bit type. This class should be replaced by a native type on -// platforms that support 128bit integers. -class UInt128 { - public: - UInt128() : high_bits_(0), low_bits_(0) { } - UInt128(uint64_t high, uint64_t low) : high_bits_(high), low_bits_(low) { } - - void Multiply(uint32_t multiplicand) { - uint64_t accumulator; - - accumulator = (low_bits_ & kMask32) * multiplicand; - uint32_t part = static_cast(accumulator & kMask32); - accumulator >>= 32; - accumulator = accumulator + (low_bits_ >> 32) * multiplicand; - low_bits_ = (accumulator << 32) + part; - accumulator >>= 32; - accumulator = accumulator + (high_bits_ & kMask32) * multiplicand; - part = static_cast(accumulator & kMask32); - accumulator >>= 32; - accumulator = accumulator + (high_bits_ >> 32) * multiplicand; - high_bits_ = (accumulator << 32) + part; - ASSERT((accumulator >> 32) == 0); - } - - void Shift(int shift_amount) { - ASSERT(-64 <= shift_amount && shift_amount <= 64); - if (shift_amount == 0) { - return; - } else if (shift_amount == -64) { - high_bits_ = low_bits_; - low_bits_ = 0; - } else if (shift_amount == 64) { - low_bits_ = high_bits_; - high_bits_ = 0; - } else if (shift_amount <= 0) { - high_bits_ <<= -shift_amount; - high_bits_ += low_bits_ >> (64 + shift_amount); - low_bits_ <<= -shift_amount; - } else { - low_bits_ >>= shift_amount; - low_bits_ += high_bits_ << (64 - shift_amount); - high_bits_ >>= shift_amount; - } - } - - // Modifies *this to *this MOD (2^power). - // Returns *this DIV (2^power). - int DivModPowerOf2(int power) { - if (power >= 64) { - int result = static_cast(high_bits_ >> (power - 64)); - high_bits_ -= static_cast(result) << (power - 64); - return result; - } else { - uint64_t part_low = low_bits_ >> power; - uint64_t part_high = high_bits_ << (64 - power); - int result = static_cast(part_low + part_high); - high_bits_ = 0; - low_bits_ -= part_low << power; - return result; - } - } - - bool IsZero() const { - return high_bits_ == 0 && low_bits_ == 0; - } - - int BitAt(int position) { - if (position >= 64) { - return static_cast(high_bits_ >> (position - 64)) & 1; - } else { - return static_cast(low_bits_ >> position) & 1; - } - } - - private: - static const uint64_t kMask32 = 0xFFFFFFFF; - // Value == (high_bits_ << 64) + low_bits_ - uint64_t high_bits_; - uint64_t low_bits_; -}; - - -static const int kDoubleSignificandSize = 53; // Includes the hidden bit. - - -static void FillDigits32FixedLength(uint32_t number, int requested_length, - Vector buffer, int* length) { - for (int i = requested_length - 1; i >= 0; --i) { - buffer[(*length) + i] = '0' + number % 10; - number /= 10; - } - *length += requested_length; -} - - -static void FillDigits32(uint32_t number, Vector buffer, int* length) { - int number_length = 0; - // We fill the digits in reverse order and exchange them afterwards. - while (number != 0) { - int digit = number % 10; - number /= 10; - buffer[(*length) + number_length] = static_cast('0' + digit); - number_length++; - } - // Exchange the digits. - int i = *length; - int j = *length + number_length - 1; - while (i < j) { - char tmp = buffer[i]; - buffer[i] = buffer[j]; - buffer[j] = tmp; - i++; - j--; - } - *length += number_length; -} - - -static void FillDigits64FixedLength(uint64_t number, - Vector buffer, int* length) { - const uint32_t kTen7 = 10000000; - // For efficiency cut the number into 3 uint32_t parts, and print those. - uint32_t part2 = static_cast(number % kTen7); - number /= kTen7; - uint32_t part1 = static_cast(number % kTen7); - uint32_t part0 = static_cast(number / kTen7); - - FillDigits32FixedLength(part0, 3, buffer, length); - FillDigits32FixedLength(part1, 7, buffer, length); - FillDigits32FixedLength(part2, 7, buffer, length); -} - - -static void FillDigits64(uint64_t number, Vector buffer, int* length) { - const uint32_t kTen7 = 10000000; - // For efficiency cut the number into 3 uint32_t parts, and print those. - uint32_t part2 = static_cast(number % kTen7); - number /= kTen7; - uint32_t part1 = static_cast(number % kTen7); - uint32_t part0 = static_cast(number / kTen7); - - if (part0 != 0) { - FillDigits32(part0, buffer, length); - FillDigits32FixedLength(part1, 7, buffer, length); - FillDigits32FixedLength(part2, 7, buffer, length); - } else if (part1 != 0) { - FillDigits32(part1, buffer, length); - FillDigits32FixedLength(part2, 7, buffer, length); - } else { - FillDigits32(part2, buffer, length); - } -} - - -static void RoundUp(Vector buffer, int* length, int* decimal_point) { - // An empty buffer represents 0. - if (*length == 0) { - buffer[0] = '1'; - *decimal_point = 1; - *length = 1; - return; - } - // Round the last digit until we either have a digit that was not '9' or until - // we reached the first digit. - buffer[(*length) - 1]++; - for (int i = (*length) - 1; i > 0; --i) { - if (buffer[i] != '0' + 10) { - return; - } - buffer[i] = '0'; - buffer[i - 1]++; - } - // If the first digit is now '0' + 10, we would need to set it to '0' and add - // a '1' in front. However we reach the first digit only if all following - // digits had been '9' before rounding up. Now all trailing digits are '0' and - // we simply switch the first digit to '1' and update the decimal-point - // (indicating that the point is now one digit to the right). - if (buffer[0] == '0' + 10) { - buffer[0] = '1'; - (*decimal_point)++; - } -} - - -// The given fractionals number represents a fixed-point number with binary -// point at bit (-exponent). -// Preconditions: -// -128 <= exponent <= 0. -// 0 <= fractionals * 2^exponent < 1 -// The buffer holds the result. -// The function will round its result. During the rounding-process digits not -// generated by this function might be updated, and the decimal-point variable -// might be updated. If this function generates the digits 99 and the buffer -// already contained "199" (thus yielding a buffer of "19999") then a -// rounding-up will change the contents of the buffer to "20000". -static void FillFractionals(uint64_t fractionals, int exponent, - int fractional_count, Vector buffer, - int* length, int* decimal_point) { - ASSERT(-128 <= exponent && exponent <= 0); - // 'fractionals' is a fixed-point number, with binary point at bit - // (-exponent). Inside the function the non-converted remainder of fractionals - // is a fixed-point number, with binary point at bit 'point'. - if (-exponent <= 64) { - // One 64 bit number is sufficient. - ASSERT(fractionals >> 56 == 0); - int point = -exponent; - for (int i = 0; i < fractional_count; ++i) { - if (fractionals == 0) break; - // Instead of multiplying by 10 we multiply by 5 and adjust the point - // location. This way the fractionals variable will not overflow. - // Invariant at the beginning of the loop: fractionals < 2^point. - // Initially we have: point <= 64 and fractionals < 2^56 - // After each iteration the point is decremented by one. - // Note that 5^3 = 125 < 128 = 2^7. - // Therefore three iterations of this loop will not overflow fractionals - // (even without the subtraction at the end of the loop body). At this - // time point will satisfy point <= 61 and therefore fractionals < 2^point - // and any further multiplication of fractionals by 5 will not overflow. - fractionals *= 5; - point--; - int digit = static_cast(fractionals >> point); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - fractionals -= static_cast(digit) << point; - } - // If the first bit after the point is set we have to round up. - if (((fractionals >> (point - 1)) & 1) == 1) { - RoundUp(buffer, length, decimal_point); - } - } else { // We need 128 bits. - ASSERT(64 < -exponent && -exponent <= 128); - UInt128 fractionals128 = UInt128(fractionals, 0); - fractionals128.Shift(-exponent - 64); - int point = 128; - for (int i = 0; i < fractional_count; ++i) { - if (fractionals128.IsZero()) break; - // As before: instead of multiplying by 10 we multiply by 5 and adjust the - // point location. - // This multiplication will not overflow for the same reasons as before. - fractionals128.Multiply(5); - point--; - int digit = fractionals128.DivModPowerOf2(point); - ASSERT(digit <= 9); - buffer[*length] = static_cast('0' + digit); - (*length)++; - } - if (fractionals128.BitAt(point - 1) == 1) { - RoundUp(buffer, length, decimal_point); - } - } -} - - -// Removes leading and trailing zeros. -// If leading zeros are removed then the decimal point position is adjusted. -static void TrimZeros(Vector buffer, int* length, int* decimal_point) { - while (*length > 0 && buffer[(*length) - 1] == '0') { - (*length)--; - } - int first_non_zero = 0; - while (first_non_zero < *length && buffer[first_non_zero] == '0') { - first_non_zero++; - } - if (first_non_zero != 0) { - for (int i = first_non_zero; i < *length; ++i) { - buffer[i - first_non_zero] = buffer[i]; - } - *length -= first_non_zero; - *decimal_point -= first_non_zero; - } -} - - -bool FastFixedDtoa(double v, - int fractional_count, - Vector buffer, - int* length, - int* decimal_point) { - const uint32_t kMaxUInt32 = 0xFFFFFFFF; - uint64_t significand = Double(v).Significand(); - int exponent = Double(v).Exponent(); - // v = significand * 2^exponent (with significand a 53bit integer). - // If the exponent is larger than 20 (i.e. we may have a 73bit number) then we - // don't know how to compute the representation. 2^73 ~= 9.5*10^21. - // If necessary this limit could probably be increased, but we don't need - // more. - if (exponent > 20) return false; - if (fractional_count > 20) return false; - *length = 0; - // At most kDoubleSignificandSize bits of the significand are non-zero. - // Given a 64 bit integer we have 11 0s followed by 53 potentially non-zero - // bits: 0..11*..0xxx..53*..xx - if (exponent + kDoubleSignificandSize > 64) { - // The exponent must be > 11. - // - // We know that v = significand * 2^exponent. - // And the exponent > 11. - // We simplify the task by dividing v by 10^17. - // The quotient delivers the first digits, and the remainder fits into a 64 - // bit number. - // Dividing by 10^17 is equivalent to dividing by 5^17*2^17. - const uint64_t kFive17 = UINT64_2PART_C(0xB1, A2BC2EC5); // 5^17 - uint64_t divisor = kFive17; - int divisor_power = 17; - uint64_t dividend = significand; - uint32_t quotient; - uint64_t remainder; - // Let v = f * 2^e with f == significand and e == exponent. - // Then need q (quotient) and r (remainder) as follows: - // v = q * 10^17 + r - // f * 2^e = q * 10^17 + r - // f * 2^e = q * 5^17 * 2^17 + r - // If e > 17 then - // f * 2^(e-17) = q * 5^17 + r/2^17 - // else - // f = q * 5^17 * 2^(17-e) + r/2^e - if (exponent > divisor_power) { - // We only allow exponents of up to 20 and therefore (17 - e) <= 3 - dividend <<= exponent - divisor_power; - quotient = static_cast(dividend / divisor); - remainder = (dividend % divisor) << divisor_power; - } else { - divisor <<= divisor_power - exponent; - quotient = static_cast(dividend / divisor); - remainder = (dividend % divisor) << exponent; - } - FillDigits32(quotient, buffer, length); - FillDigits64FixedLength(remainder, buffer, length); - *decimal_point = *length; - } else if (exponent >= 0) { - // 0 <= exponent <= 11 - significand <<= exponent; - FillDigits64(significand, buffer, length); - *decimal_point = *length; - } else if (exponent > -kDoubleSignificandSize) { - // We have to cut the number. - uint64_t integrals = significand >> -exponent; - uint64_t fractionals = significand - (integrals << -exponent); - if (integrals > kMaxUInt32) { - FillDigits64(integrals, buffer, length); - } else { - FillDigits32(static_cast(integrals), buffer, length); - } - *decimal_point = *length; - FillFractionals(fractionals, exponent, fractional_count, - buffer, length, decimal_point); - } else if (exponent < -128) { - // This configuration (with at most 20 digits) means that all digits must be - // 0. - ASSERT(fractional_count <= 20); - buffer[0] = '\0'; - *length = 0; - *decimal_point = -fractional_count; - } else { - *decimal_point = 0; - FillFractionals(significand, exponent, fractional_count, - buffer, length, decimal_point); - } - TrimZeros(buffer, length, decimal_point); - buffer[*length] = '\0'; - if ((*length) == 0) { - // The string is empty and the decimal_point thus has no importance. Mimic - // Gay's dtoa and and set it to -fractional_count. - *decimal_point = -fractional_count; - } - return true; -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/fixed-dtoa.h b/base/poco/Foundation/src/fixed-dtoa.h deleted file mode 100644 index c39eecce51f..00000000000 --- a/base/poco/Foundation/src/fixed-dtoa.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_FIXED_DTOA_H_ -#define DOUBLE_CONVERSION_FIXED_DTOA_H_ - -#include "utils.h" - -namespace double_conversion -{ - -// Produces digits necessary to print a given number with -// 'fractional_count' digits after the decimal point. -// The buffer must be big enough to hold the result plus one terminating null -// character. -// -// The produced digits might be too short in which case the caller has to fill -// the gaps with '0's. -// Example: FastFixedDtoa(0.001, 5, ...) is allowed to return buffer = "1", and -// decimal_point = -2. -// Halfway cases are rounded towards +/-Infinity (away from 0). The call -// FastFixedDtoa(0.15, 2, ...) thus returns buffer = "2", decimal_point = 0. -// The returned buffer may contain digits that would be truncated from the -// shortest representation of the input. -// -// This method only works for some parameters. If it can't handle the input it -// returns false. The output is null-terminated when the function succeeds. -bool FastFixedDtoa(double v, int fractional_count, Vector buffer, int * length, int * decimal_point); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_FIXED_DTOA_H_ diff --git a/base/poco/Foundation/src/gzguts.h b/base/poco/Foundation/src/gzguts.h deleted file mode 100644 index 1b964756065..00000000000 --- a/base/poco/Foundation/src/gzguts.h +++ /dev/null @@ -1,194 +0,0 @@ -/* gzguts.h -- zlib internal header definitions for gz* operations - * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#ifdef _LARGEFILE64_SOURCE -# ifndef _LARGEFILE_SOURCE -# define _LARGEFILE_SOURCE 1 -# endif -# ifdef _FILE_OFFSET_BITS -# undef _FILE_OFFSET_BITS -# endif -#endif - -#ifdef HAVE_HIDDEN -# define ZLIB_INTERNAL __attribute__((visibility("hidden"))) -#else -# define ZLIB_INTERNAL -#endif - -#include -#include "zlib.h" -#ifdef STDC -# include -# include -# include -#endif - -#ifndef _POSIX_SOURCE -# define _POSIX_SOURCE -#endif -#include - - -# if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) -# include -# endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define WIDECHAR -#endif - -#ifdef WINAPI_FAMILY -# define open _open -# define read _read -# define write _write -# define close _close -#endif - -#ifdef NO_DEFLATE /* for compatibility with old definition */ -# define NO_GZCOMPRESS -#endif - -#if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) -# ifndef HAVE_VSNPRINTF -# define HAVE_VSNPRINTF -# endif -#endif - - -#ifndef HAVE_VSNPRINTF -# ifdef __TURBOC__ -# define NO_vsnprintf -# endif -# ifdef WIN32 -/* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ -# if !defined(vsnprintf) && !defined(NO_vsnprintf) -# define vsnprintf _vsnprintf -# endif -# endif -# ifdef __SASC -# define NO_vsnprintf -# endif -# ifdef VMS -# define NO_vsnprintf -# endif -# ifdef __OS400__ -# define NO_vsnprintf -# endif -# ifdef __MVS__ -# define NO_vsnprintf -# endif -#endif - -/* unlike snprintf (which is required in C99), _snprintf does not guarantee - null termination of the result -- however this is only used in gzlib.c where - the result is assured to fit in the space provided */ - -#ifndef local -# define local static -#endif -/* since "static" is used to mean two completely different things in C, we - define "local" for the non-static meaning of "static", for readability - (compile with -Dlocal if your debugger can't find static symbols) */ - -/* gz* functions always use library allocation functions */ -#ifndef STDC -extern voidp malloc OF((uInt size)); -extern void free OF((voidpf ptr)); -#endif - -/* get errno and strerror definition */ -#if defined UNDER_CE -# include -# define zstrerror() gz_strwinerror((DWORD)GetLastError()) -#else -# ifndef NO_STRERROR -# include -# define zstrerror() strerror(errno) -# else -# define zstrerror() "stdio error (consult errno)" -# endif -#endif - -/* provide prototypes for these when building zlib without LFS */ -#if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE - 0 == 0 -ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); -ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); -ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); -ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); -#endif - -/* default memLevel */ -#if MAX_MEM_LEVEL >= 8 -# define DEF_MEM_LEVEL 8 -#else -# define DEF_MEM_LEVEL MAX_MEM_LEVEL -#endif - -/* default i/o buffer size -- double this for output when reading (this and - twice this must be able to fit in an unsigned type) */ -#define GZBUFSIZE 8192 - -/* gzip modes, also provide a little integrity check on the passed structure */ -#define GZ_NONE 0 -#define GZ_READ 7247 -#define GZ_WRITE 31153 -#define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ - -/* values for gz_state how */ -#define LOOK 0 /* look for a gzip header */ -#define COPY 1 /* copy input directly */ -#define GZIP 2 /* decompress a gzip stream */ - -/* internal gzip file state data structure */ -typedef struct -{ - /* exposed contents for gzgetc() macro */ - struct gzFile_s x; /* "x" for exposed */ - /* x.have: number of bytes available at x.next */ - /* x.next: next output data to deliver or write */ - /* x.pos: current position in uncompressed data */ - /* used for both reading and writing */ - int mode; /* see gzip modes above */ - int fd; /* file descriptor */ - char * path; /* path or fd for error messages */ - unsigned size; /* buffer size, zero if not allocated yet */ - unsigned want; /* requested buffer size, default is GZBUFSIZE */ - unsigned char * in; /* input buffer (double-sized when writing) */ - unsigned char * out; /* output buffer (double-sized when reading) */ - int direct; /* 0 if processing gzip, 1 if transparent */ - /* just for reading */ - int how; /* 0: get header, 1: copy, 2: decompress */ - z_off64_t start; /* where the gzip data started, for rewinding */ - int eof; /* true if end of input file reached */ - int past; /* true if read requested past end */ - /* just for writing */ - int level; /* compression level */ - int strategy; /* compression strategy */ - /* seek request */ - z_off64_t skip; /* amount to skip (already rewound if backwards) */ - int seek; /* true if seek request pending */ - /* error information */ - int err; /* error code */ - char * msg; /* error message */ - /* zlib inflate or deflate stream */ - z_stream strm; /* stream structure in-place (not a pointer) */ -} gz_state; -typedef gz_state FAR * gz_statep; - -/* shared functions */ -void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); -#if defined UNDER_CE -char ZLIB_INTERNAL * gz_strwinerror OF((DWORD error)); -#endif - -/* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t - value -- needed when comparing unsigned to z_off64_t, which is signed - (possible z_off64_t types off_t, off64_t, and long are all signed) */ -#ifdef INT_MAX -# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) -#else -unsigned ZLIB_INTERNAL gz_intmax OF((void)); -# define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) -#endif diff --git a/base/poco/Foundation/src/ieee.h b/base/poco/Foundation/src/ieee.h deleted file mode 100644 index 2cfd39bab66..00000000000 --- a/base/poco/Foundation/src/ieee.h +++ /dev/null @@ -1,458 +0,0 @@ -// Copyright 2012 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_DOUBLE_H_ -#define DOUBLE_CONVERSION_DOUBLE_H_ - -#include "diy-fp.h" - -namespace double_conversion -{ - -// We assume that doubles and uint64_t have the same endianness. -static uint64_t double_to_uint64(double d) -{ - return BitCast(d); -} -static double uint64_to_double(uint64_t d64) -{ - return BitCast(d64); -} -static uint32_t float_to_uint32(float f) -{ - return BitCast(f); -} -static float uint32_to_float(uint32_t d32) -{ - return BitCast(d32); -} - -// Helper functions for doubles. -class Double -{ -public: - static const uint64_t kSignMask = UINT64_2PART_C(0x80000000, 00000000); - static const uint64_t kExponentMask = UINT64_2PART_C(0x7FF00000, 00000000); - static const uint64_t kSignificandMask = UINT64_2PART_C(0x000FFFFF, FFFFFFFF); - static const uint64_t kHiddenBit = UINT64_2PART_C(0x00100000, 00000000); - static const int kPhysicalSignificandSize = 52; // Excludes the hidden bit. - static const int kSignificandSize = 53; - - Double() : d64_(0) { } - explicit Double(double d) : d64_(double_to_uint64(d)) { } - explicit Double(uint64_t d64) : d64_(d64) { } - explicit Double(DiyFp diy_fp) : d64_(DiyFpToUint64(diy_fp)) { } - - // The value encoded by this Double must be greater or equal to +0.0. - // It must not be special (infinity, or NaN). - DiyFp AsDiyFp() const - { - ASSERT(Sign() > 0); - ASSERT(!IsSpecial()); - return DiyFp(Significand(), Exponent()); - } - - // The value encoded by this Double must be strictly greater than 0. - DiyFp AsNormalizedDiyFp() const - { - ASSERT(value() > 0.0); - uint64_t f = Significand(); - int e = Exponent(); - - // The current double could be a denormal. - while ((f & kHiddenBit) == 0) - { - f <<= 1; - e--; - } - // Do the final shifts in one go. - f <<= DiyFp::kSignificandSize - kSignificandSize; - e -= DiyFp::kSignificandSize - kSignificandSize; - return DiyFp(f, e); - } - - // Returns the double's bit as uint64. - uint64_t AsUint64() const { return d64_; } - - // Returns the next greater double. Returns +infinity on input +infinity. - double NextDouble() const - { - if (d64_ == kInfinity) - return Double(kInfinity).value(); - if (Sign() < 0 && Significand() == 0) - { - // -0.0 - return 0.0; - } - if (Sign() < 0) - { - return Double(d64_ - 1).value(); - } - else - { - return Double(d64_ + 1).value(); - } - } - - double PreviousDouble() const - { - if (d64_ == (kInfinity | kSignMask)) - return -Double::Infinity(); - if (Sign() < 0) - { - return Double(d64_ + 1).value(); - } - else - { - if (Significand() == 0) - return -0.0; - return Double(d64_ - 1).value(); - } - } - - int Exponent() const - { - if (IsDenormal()) - return kDenormalExponent; - - uint64_t d64 = AsUint64(); - int biased_e = static_cast((d64 & kExponentMask) >> kPhysicalSignificandSize); - return biased_e - kExponentBias; - } - - uint64_t Significand() const - { - uint64_t d64 = AsUint64(); - uint64_t significand = d64 & kSignificandMask; - if (!IsDenormal()) - { - return significand + kHiddenBit; - } - else - { - return significand; - } - } - - // Returns true if the double is a denormal. - bool IsDenormal() const - { - uint64_t d64 = AsUint64(); - return (d64 & kExponentMask) == 0; - } - - // We consider denormals not to be special. - // Hence only Infinity and NaN are special. - bool IsSpecial() const - { - uint64_t d64 = AsUint64(); - return (d64 & kExponentMask) == kExponentMask; - } - - bool IsNan() const - { - uint64_t d64 = AsUint64(); - return ((d64 & kExponentMask) == kExponentMask) && ((d64 & kSignificandMask) != 0); - } - - bool IsInfinite() const - { - uint64_t d64 = AsUint64(); - return ((d64 & kExponentMask) == kExponentMask) && ((d64 & kSignificandMask) == 0); - } - - int Sign() const - { - uint64_t d64 = AsUint64(); - return (d64 & kSignMask) == 0 ? 1 : -1; - } - - // Precondition: the value encoded by this Double must be greater or equal - // than +0.0. - DiyFp UpperBoundary() const - { - ASSERT(Sign() > 0); - return DiyFp(Significand() * 2 + 1, Exponent() - 1); - } - - // Computes the two boundaries of this. - // The bigger boundary (m_plus) is normalized. The lower boundary has the same - // exponent as m_plus. - // Precondition: the value encoded by this Double must be greater than 0. - void NormalizedBoundaries(DiyFp * out_m_minus, DiyFp * out_m_plus) const - { - ASSERT(value() > 0.0); - DiyFp v = this->AsDiyFp(); - DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); - DiyFp m_minus; - if (LowerBoundaryIsCloser()) - { - m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); - } - else - { - m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); - } - m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); - m_minus.set_e(m_plus.e()); - *out_m_plus = m_plus; - *out_m_minus = m_minus; - } - - bool LowerBoundaryIsCloser() const - { - // The boundary is closer if the significand is of the form f == 2^p-1 then - // the lower boundary is closer. - // Think of v = 1000e10 and v- = 9999e9. - // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but - // at a distance of 1e8. - // The only exception is for the smallest normal: the largest denormal is - // at the same distance as its successor. - // Note: denormals have the same exponent as the smallest normals. - bool physical_significand_is_zero = ((AsUint64() & kSignificandMask) == 0); - return physical_significand_is_zero && (Exponent() != kDenormalExponent); - } - - double value() const { return uint64_to_double(d64_); } - - // Returns the significand size for a given order of magnitude. - // If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude. - // This function returns the number of significant binary digits v will have - // once it's encoded into a double. In almost all cases this is equal to - // kSignificandSize. The only exceptions are denormals. They start with - // leading zeroes and their effective significand-size is hence smaller. - static int SignificandSizeForOrderOfMagnitude(int order) - { - if (order >= (kDenormalExponent + kSignificandSize)) - { - return kSignificandSize; - } - if (order <= kDenormalExponent) - return 0; - return order - kDenormalExponent; - } - - static double Infinity() { return Double(kInfinity).value(); } - - static double NaN() { return Double(kNaN).value(); } - -private: - static const int kExponentBias = 0x3FF + kPhysicalSignificandSize; - static const int kDenormalExponent = -kExponentBias + 1; - static const int kMaxExponent = 0x7FF - kExponentBias; - static const uint64_t kInfinity = UINT64_2PART_C(0x7FF00000, 00000000); - static const uint64_t kNaN = UINT64_2PART_C(0x7FF80000, 00000000); - - const uint64_t d64_; - - static uint64_t DiyFpToUint64(DiyFp diy_fp) - { - uint64_t significand = diy_fp.f(); - int exponent = diy_fp.e(); - while (significand > kHiddenBit + kSignificandMask) - { - significand >>= 1; - exponent++; - } - if (exponent >= kMaxExponent) - { - return kInfinity; - } - if (exponent < kDenormalExponent) - { - return 0; - } - while (exponent > kDenormalExponent && (significand & kHiddenBit) == 0) - { - significand <<= 1; - exponent--; - } - uint64_t biased_exponent; - if (exponent == kDenormalExponent && (significand & kHiddenBit) == 0) - { - biased_exponent = 0; - } - else - { - biased_exponent = static_cast(exponent + kExponentBias); - } - return (significand & kSignificandMask) | (biased_exponent << kPhysicalSignificandSize); - } - - DISALLOW_COPY_AND_ASSIGN(Double); -}; - -class Single -{ -public: - static const uint32_t kSignMask = 0x80000000; - static const uint32_t kExponentMask = 0x7F800000; - static const uint32_t kSignificandMask = 0x007FFFFF; - static const uint32_t kHiddenBit = 0x00800000; - static const int kPhysicalSignificandSize = 23; // Excludes the hidden bit. - static const int kSignificandSize = 24; - - Single() : d32_(0) { } - explicit Single(float f) : d32_(float_to_uint32(f)) { } - explicit Single(uint32_t d32) : d32_(d32) { } - - // The value encoded by this Single must be greater or equal to +0.0. - // It must not be special (infinity, or NaN). - DiyFp AsDiyFp() const - { - ASSERT(Sign() > 0); - ASSERT(!IsSpecial()); - return DiyFp(Significand(), Exponent()); - } - - // Returns the single's bit as uint64. - uint32_t AsUint32() const { return d32_; } - - int Exponent() const - { - if (IsDenormal()) - return kDenormalExponent; - - uint32_t d32 = AsUint32(); - int biased_e = static_cast((d32 & kExponentMask) >> kPhysicalSignificandSize); - return biased_e - kExponentBias; - } - - uint32_t Significand() const - { - uint32_t d32 = AsUint32(); - uint32_t significand = d32 & kSignificandMask; - if (!IsDenormal()) - { - return significand + kHiddenBit; - } - else - { - return significand; - } - } - - // Returns true if the single is a denormal. - bool IsDenormal() const - { - uint32_t d32 = AsUint32(); - return (d32 & kExponentMask) == 0; - } - - // We consider denormals not to be special. - // Hence only Infinity and NaN are special. - bool IsSpecial() const - { - uint32_t d32 = AsUint32(); - return (d32 & kExponentMask) == kExponentMask; - } - - bool IsNan() const - { - uint32_t d32 = AsUint32(); - return ((d32 & kExponentMask) == kExponentMask) && ((d32 & kSignificandMask) != 0); - } - - bool IsInfinite() const - { - uint32_t d32 = AsUint32(); - return ((d32 & kExponentMask) == kExponentMask) && ((d32 & kSignificandMask) == 0); - } - - int Sign() const - { - uint32_t d32 = AsUint32(); - return (d32 & kSignMask) == 0 ? 1 : -1; - } - - // Computes the two boundaries of this. - // The bigger boundary (m_plus) is normalized. The lower boundary has the same - // exponent as m_plus. - // Precondition: the value encoded by this Single must be greater than 0. - void NormalizedBoundaries(DiyFp * out_m_minus, DiyFp * out_m_plus) const - { - ASSERT(value() > 0.0); - DiyFp v = this->AsDiyFp(); - DiyFp m_plus = DiyFp::Normalize(DiyFp((v.f() << 1) + 1, v.e() - 1)); - DiyFp m_minus; - if (LowerBoundaryIsCloser()) - { - m_minus = DiyFp((v.f() << 2) - 1, v.e() - 2); - } - else - { - m_minus = DiyFp((v.f() << 1) - 1, v.e() - 1); - } - m_minus.set_f(m_minus.f() << (m_minus.e() - m_plus.e())); - m_minus.set_e(m_plus.e()); - *out_m_plus = m_plus; - *out_m_minus = m_minus; - } - - // Precondition: the value encoded by this Single must be greater or equal - // than +0.0. - DiyFp UpperBoundary() const - { - ASSERT(Sign() > 0); - return DiyFp(Significand() * 2 + 1, Exponent() - 1); - } - - bool LowerBoundaryIsCloser() const - { - // The boundary is closer if the significand is of the form f == 2^p-1 then - // the lower boundary is closer. - // Think of v = 1000e10 and v- = 9999e9. - // Then the boundary (== (v - v-)/2) is not just at a distance of 1e9 but - // at a distance of 1e8. - // The only exception is for the smallest normal: the largest denormal is - // at the same distance as its successor. - // Note: denormals have the same exponent as the smallest normals. - bool physical_significand_is_zero = ((AsUint32() & kSignificandMask) == 0); - return physical_significand_is_zero && (Exponent() != kDenormalExponent); - } - - float value() const { return uint32_to_float(d32_); } - - static float Infinity() { return Single(kInfinity).value(); } - - static float NaN() { return Single(kNaN).value(); } - -private: - static const int kExponentBias = 0x7F + kPhysicalSignificandSize; - static const int kDenormalExponent = -kExponentBias + 1; - static const int kMaxExponent = 0xFF - kExponentBias; - static const uint32_t kInfinity = 0x7F800000; - static const uint32_t kNaN = 0x7FC00000; - - const uint32_t d32_; - - DISALLOW_COPY_AND_ASSIGN(Single); -}; - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_DOUBLE_H_ diff --git a/base/poco/Foundation/src/infback.c b/base/poco/Foundation/src/infback.c deleted file mode 100644 index 59679ecbfc5..00000000000 --- a/base/poco/Foundation/src/infback.c +++ /dev/null @@ -1,640 +0,0 @@ -/* infback.c -- inflate using a call-back interface - * Copyright (C) 1995-2016 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - This code is largely copied from inflate.c. Normally either infback.o or - inflate.o would be linked into an application--not both. The interface - with inffast.c is retained so that optimized assembler-coded versions of - inflate_fast() can be used with either inflate.c or infback.c. - */ - -#include "zutil.h" -#include "inftrees.h" -#include "inflate.h" -#include "inffast.h" - -/* function prototypes */ -local void fixedtables OF((struct inflate_state FAR *state)); - -/* - strm provides memory allocation functions in zalloc and zfree, or - Z_NULL to use the library memory allocation functions. - - windowBits is in the range 8..15, and window is a user-supplied - window and output buffer that is 2**windowBits bytes. - */ -int ZEXPORT inflateBackInit_(strm, windowBits, window, version, stream_size) -z_streamp strm; -int windowBits; -unsigned char FAR *window; -const char *version; -int stream_size; -{ - struct inflate_state FAR *state; - - if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || - stream_size != (int)(sizeof(z_stream))) - return Z_VERSION_ERROR; - if (strm == Z_NULL || window == Z_NULL || - windowBits < 8 || windowBits > 15) - return Z_STREAM_ERROR; - strm->msg = Z_NULL; /* in case we return an error */ - if (strm->zalloc == (alloc_func)0) { -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zalloc = zcalloc; - strm->opaque = (voidpf)0; -#endif - } - if (strm->zfree == (free_func)0) -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zfree = zcfree; -#endif - state = (struct inflate_state FAR *)ZALLOC(strm, 1, - sizeof(struct inflate_state)); - if (state == Z_NULL) return Z_MEM_ERROR; - Tracev((stderr, "inflate: allocated\n")); - strm->state = (struct internal_state FAR *)state; - state->dmax = 32768U; - state->wbits = (uInt)windowBits; - state->wsize = 1U << windowBits; - state->window = window; - state->wnext = 0; - state->whave = 0; - return Z_OK; -} - -/* - Return state with length and distance decoding tables and index sizes set to - fixed code decoding. Normally this returns fixed tables from inffixed.h. - If BUILDFIXED is defined, then instead this routine builds the tables the - first time it's called, and returns those tables the first time and - thereafter. This reduces the size of the code by about 2K bytes, in - exchange for a little execution time. However, BUILDFIXED should not be - used for threaded applications, since the rewriting of the tables and virgin - may not be thread-safe. - */ -local void fixedtables(state) -struct inflate_state FAR *state; -{ -#ifdef BUILDFIXED - static int virgin = 1; - static code *lenfix, *distfix; - static code fixed[544]; - - /* build fixed huffman tables if first call (may not be thread safe) */ - if (virgin) { - unsigned sym, bits; - static code *next; - - /* literal/length table */ - sym = 0; - while (sym < 144) state->lens[sym++] = 8; - while (sym < 256) state->lens[sym++] = 9; - while (sym < 280) state->lens[sym++] = 7; - while (sym < 288) state->lens[sym++] = 8; - next = fixed; - lenfix = next; - bits = 9; - inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); - - /* distance table */ - sym = 0; - while (sym < 32) state->lens[sym++] = 5; - distfix = next; - bits = 5; - inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); - - /* do this just once */ - virgin = 0; - } -#else /* !BUILDFIXED */ -# include "inffixed.h" -#endif /* BUILDFIXED */ - state->lencode = lenfix; - state->lenbits = 9; - state->distcode = distfix; - state->distbits = 5; -} - -/* Macros for inflateBack(): */ - -/* Load returned state from inflate_fast() */ -#define LOAD() \ - do { \ - put = strm->next_out; \ - left = strm->avail_out; \ - next = strm->next_in; \ - have = strm->avail_in; \ - hold = state->hold; \ - bits = state->bits; \ - } while (0) - -/* Set state from registers for inflate_fast() */ -#define RESTORE() \ - do { \ - strm->next_out = put; \ - strm->avail_out = left; \ - strm->next_in = next; \ - strm->avail_in = have; \ - state->hold = hold; \ - state->bits = bits; \ - } while (0) - -/* Clear the input bit accumulator */ -#define INITBITS() \ - do { \ - hold = 0; \ - bits = 0; \ - } while (0) - -/* Assure that some input is available. If input is requested, but denied, - then return a Z_BUF_ERROR from inflateBack(). */ -#define PULL() \ - do { \ - if (have == 0) { \ - have = in(in_desc, &next); \ - if (have == 0) { \ - next = Z_NULL; \ - ret = Z_BUF_ERROR; \ - goto inf_leave; \ - } \ - } \ - } while (0) - -/* Get a byte of input into the bit accumulator, or return from inflateBack() - with an error if there is no input available. */ -#define PULLBYTE() \ - do { \ - PULL(); \ - have--; \ - hold += (unsigned long)(*next++) << bits; \ - bits += 8; \ - } while (0) - -/* Assure that there are at least n bits in the bit accumulator. If there is - not enough available input to do that, then return from inflateBack() with - an error. */ -#define NEEDBITS(n) \ - do { \ - while (bits < (unsigned)(n)) \ - PULLBYTE(); \ - } while (0) - -/* Return the low n bits of the bit accumulator (n < 16) */ -#define BITS(n) \ - ((unsigned)hold & ((1U << (n)) - 1)) - -/* Remove n bits from the bit accumulator */ -#define DROPBITS(n) \ - do { \ - hold >>= (n); \ - bits -= (unsigned)(n); \ - } while (0) - -/* Remove zero to seven bits as needed to go to a byte boundary */ -#define BYTEBITS() \ - do { \ - hold >>= bits & 7; \ - bits -= bits & 7; \ - } while (0) - -/* Assure that some output space is available, by writing out the window - if it's full. If the write fails, return from inflateBack() with a - Z_BUF_ERROR. */ -#define ROOM() \ - do { \ - if (left == 0) { \ - put = state->window; \ - left = state->wsize; \ - state->whave = left; \ - if (out(out_desc, put, left)) { \ - ret = Z_BUF_ERROR; \ - goto inf_leave; \ - } \ - } \ - } while (0) - -/* - strm provides the memory allocation functions and window buffer on input, - and provides information on the unused input on return. For Z_DATA_ERROR - returns, strm will also provide an error message. - - in() and out() are the call-back input and output functions. When - inflateBack() needs more input, it calls in(). When inflateBack() has - filled the window with output, or when it completes with data in the - window, it calls out() to write out the data. The application must not - change the provided input until in() is called again or inflateBack() - returns. The application must not change the window/output buffer until - inflateBack() returns. - - in() and out() are called with a descriptor parameter provided in the - inflateBack() call. This parameter can be a structure that provides the - information required to do the read or write, as well as accumulated - information on the input and output such as totals and check values. - - in() should return zero on failure. out() should return non-zero on - failure. If either in() or out() fails, than inflateBack() returns a - Z_BUF_ERROR. strm->next_in can be checked for Z_NULL to see whether it - was in() or out() that caused in the error. Otherwise, inflateBack() - returns Z_STREAM_END on success, Z_DATA_ERROR for an deflate format - error, or Z_MEM_ERROR if it could not allocate memory for the state. - inflateBack() can also return Z_STREAM_ERROR if the input parameters - are not correct, i.e. strm is Z_NULL or the state was not initialized. - */ -int ZEXPORT inflateBack(strm, in, in_desc, out, out_desc) -z_streamp strm; -in_func in; -void FAR *in_desc; -out_func out; -void FAR *out_desc; -{ - struct inflate_state FAR *state; - z_const unsigned char FAR *next; /* next input */ - unsigned char FAR *put; /* next output */ - unsigned have, left; /* available input and output */ - unsigned long hold; /* bit buffer */ - unsigned bits; /* bits in bit buffer */ - unsigned copy; /* number of stored or match bytes to copy */ - unsigned char FAR *from; /* where to copy match bytes from */ - code here; /* current decoding table entry */ - code last; /* parent table entry */ - unsigned len; /* length to copy for repeats, bits to drop */ - int ret; /* return code */ - static const unsigned short order[19] = /* permutation of code lengths */ - {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - - /* Check that the strm exists and that the state was initialized */ - if (strm == Z_NULL || strm->state == Z_NULL) - return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - - /* Reset the state */ - strm->msg = Z_NULL; - state->mode = TYPE; - state->last = 0; - state->whave = 0; - next = strm->next_in; - have = next != Z_NULL ? strm->avail_in : 0; - hold = 0; - bits = 0; - put = state->window; - left = state->wsize; - - /* Inflate until end of block marked as last */ - for (;;) - switch (state->mode) { - case TYPE: - /* determine and dispatch block type */ - if (state->last) { - BYTEBITS(); - state->mode = DONE; - break; - } - NEEDBITS(3); - state->last = BITS(1); - DROPBITS(1); - switch (BITS(2)) { - case 0: /* stored block */ - Tracev((stderr, "inflate: stored block%s\n", - state->last ? " (last)" : "")); - state->mode = STORED; - break; - case 1: /* fixed block */ - fixedtables(state); - Tracev((stderr, "inflate: fixed codes block%s\n", - state->last ? " (last)" : "")); - state->mode = LEN; /* decode codes */ - break; - case 2: /* dynamic block */ - Tracev((stderr, "inflate: dynamic codes block%s\n", - state->last ? " (last)" : "")); - state->mode = TABLE; - break; - case 3: - strm->msg = (char *)"invalid block type"; - state->mode = BAD; - } - DROPBITS(2); - break; - - case STORED: - /* get and verify stored block length */ - BYTEBITS(); /* go to byte boundary */ - NEEDBITS(32); - if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { - strm->msg = (char *)"invalid stored block lengths"; - state->mode = BAD; - break; - } - state->length = (unsigned)hold & 0xffff; - Tracev((stderr, "inflate: stored length %u\n", - state->length)); - INITBITS(); - - /* copy stored block from input to output */ - while (state->length != 0) { - copy = state->length; - PULL(); - ROOM(); - if (copy > have) copy = have; - if (copy > left) copy = left; - zmemcpy(put, next, copy); - have -= copy; - next += copy; - left -= copy; - put += copy; - state->length -= copy; - } - Tracev((stderr, "inflate: stored end\n")); - state->mode = TYPE; - break; - - case TABLE: - /* get dynamic table entries descriptor */ - NEEDBITS(14); - state->nlen = BITS(5) + 257; - DROPBITS(5); - state->ndist = BITS(5) + 1; - DROPBITS(5); - state->ncode = BITS(4) + 4; - DROPBITS(4); -#ifndef PKZIP_BUG_WORKAROUND - if (state->nlen > 286 || state->ndist > 30) { - strm->msg = (char *)"too many length or distance symbols"; - state->mode = BAD; - break; - } -#endif - Tracev((stderr, "inflate: table sizes ok\n")); - - /* get code length code lengths (not a typo) */ - state->have = 0; - while (state->have < state->ncode) { - NEEDBITS(3); - state->lens[order[state->have++]] = (unsigned short)BITS(3); - DROPBITS(3); - } - while (state->have < 19) - state->lens[order[state->have++]] = 0; - state->next = state->codes; - state->lencode = (code const FAR *)(state->next); - state->lenbits = 7; - ret = inflate_table(CODES, state->lens, 19, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid code lengths set"; - state->mode = BAD; - break; - } - Tracev((stderr, "inflate: code lengths ok\n")); - - /* get length and distance code code lengths */ - state->have = 0; - while (state->have < state->nlen + state->ndist) { - for (;;) { - here = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if (here.val < 16) { - DROPBITS(here.bits); - state->lens[state->have++] = here.val; - } - else { - if (here.val == 16) { - NEEDBITS(here.bits + 2); - DROPBITS(here.bits); - if (state->have == 0) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - len = (unsigned)(state->lens[state->have - 1]); - copy = 3 + BITS(2); - DROPBITS(2); - } - else if (here.val == 17) { - NEEDBITS(here.bits + 3); - DROPBITS(here.bits); - len = 0; - copy = 3 + BITS(3); - DROPBITS(3); - } - else { - NEEDBITS(here.bits + 7); - DROPBITS(here.bits); - len = 0; - copy = 11 + BITS(7); - DROPBITS(7); - } - if (state->have + copy > state->nlen + state->ndist) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - while (copy--) - state->lens[state->have++] = (unsigned short)len; - } - } - - /* handle error breaks in while */ - if (state->mode == BAD) break; - - /* check for end-of-block code (better have one) */ - if (state->lens[256] == 0) { - strm->msg = (char *)"invalid code -- missing end-of-block"; - state->mode = BAD; - break; - } - - /* build code tables -- note: do not change the lenbits or distbits - values here (9 and 6) without reading the comments in inftrees.h - concerning the ENOUGH constants, which depend on those values */ - state->next = state->codes; - state->lencode = (code const FAR *)(state->next); - state->lenbits = 9; - ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid literal/lengths set"; - state->mode = BAD; - break; - } - state->distcode = (code const FAR *)(state->next); - state->distbits = 6; - ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, - &(state->next), &(state->distbits), state->work); - if (ret) { - strm->msg = (char *)"invalid distances set"; - state->mode = BAD; - break; - } - Tracev((stderr, "inflate: codes ok\n")); - state->mode = LEN; - - case LEN: - /* use inflate_fast() if we have enough input and output */ - if (have >= 6 && left >= 258) { - RESTORE(); - if (state->whave < state->wsize) - state->whave = state->wsize - left; - inflate_fast(strm, state->wsize); - LOAD(); - break; - } - - /* get a literal, length, or end-of-block code */ - for (;;) { - here = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if (here.op && (here.op & 0xf0) == 0) { - last = here; - for (;;) { - here = state->lencode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + here.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - } - DROPBITS(here.bits); - state->length = (unsigned)here.val; - - /* process literal */ - if (here.op == 0) { - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - ROOM(); - *put++ = (unsigned char)(state->length); - left--; - state->mode = LEN; - break; - } - - /* process end of block */ - if (here.op & 32) { - Tracevv((stderr, "inflate: end of block\n")); - state->mode = TYPE; - break; - } - - /* invalid code */ - if (here.op & 64) { - strm->msg = (char *)"invalid literal/length code"; - state->mode = BAD; - break; - } - - /* length code -- get extra bits, if any */ - state->extra = (unsigned)(here.op) & 15; - if (state->extra != 0) { - NEEDBITS(state->extra); - state->length += BITS(state->extra); - DROPBITS(state->extra); - } - Tracevv((stderr, "inflate: length %u\n", state->length)); - - /* get distance code */ - for (;;) { - here = state->distcode[BITS(state->distbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if ((here.op & 0xf0) == 0) { - last = here; - for (;;) { - here = state->distcode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + here.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - } - DROPBITS(here.bits); - if (here.op & 64) { - strm->msg = (char *)"invalid distance code"; - state->mode = BAD; - break; - } - state->offset = (unsigned)here.val; - - /* get distance extra bits, if any */ - state->extra = (unsigned)(here.op) & 15; - if (state->extra != 0) { - NEEDBITS(state->extra); - state->offset += BITS(state->extra); - DROPBITS(state->extra); - } - if (state->offset > state->wsize - (state->whave < state->wsize ? - left : 0)) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } - Tracevv((stderr, "inflate: distance %u\n", state->offset)); - - /* copy match from window to output */ - do { - ROOM(); - copy = state->wsize - state->offset; - if (copy < left) { - from = put + copy; - copy = left - copy; - } - else { - from = put - state->offset; - copy = left; - } - if (copy > state->length) copy = state->length; - state->length -= copy; - left -= copy; - do { - *put++ = *from++; - } while (--copy); - } while (state->length != 0); - break; - - case DONE: - /* inflate stream terminated properly -- write leftover output */ - ret = Z_STREAM_END; - if (left < state->wsize) { - if (out(out_desc, state->window, state->wsize - left)) - ret = Z_BUF_ERROR; - } - goto inf_leave; - - case BAD: - ret = Z_DATA_ERROR; - goto inf_leave; - - default: /* can't happen, but makes compilers happy */ - ret = Z_STREAM_ERROR; - goto inf_leave; - } - - /* Return unused input */ - inf_leave: - strm->next_in = next; - strm->avail_in = have; - return ret; -} - -int ZEXPORT inflateBackEnd(strm) -z_streamp strm; -{ - if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0) - return Z_STREAM_ERROR; - ZFREE(strm, strm->state); - strm->state = Z_NULL; - Tracev((stderr, "inflate: end\n")); - return Z_OK; -} diff --git a/base/poco/Foundation/src/inffast.c b/base/poco/Foundation/src/inffast.c deleted file mode 100644 index 0dbd1dbc09f..00000000000 --- a/base/poco/Foundation/src/inffast.c +++ /dev/null @@ -1,323 +0,0 @@ -/* inffast.c -- fast decoding - * Copyright (C) 1995-2017 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#include "zutil.h" -#include "inftrees.h" -#include "inflate.h" -#include "inffast.h" - -#ifdef ASMINF -# pragma message("Assembler code may have bugs -- use at your own risk") -#else - -/* - Decode literal, length, and distance codes and write out the resulting - literal and match bytes until either not enough input or output is - available, an end-of-block is encountered, or a data error is encountered. - When large enough input and output buffers are supplied to inflate(), for - example, a 16K input buffer and a 64K output buffer, more than 95% of the - inflate execution time is spent in this routine. - - Entry assumptions: - - state->mode == LEN - strm->avail_in >= 6 - strm->avail_out >= 258 - start >= strm->avail_out - state->bits < 8 - - On return, state->mode is one of: - - LEN -- ran out of enough output space or enough available input - TYPE -- reached end of block code, inflate() to interpret next block - BAD -- error in block data - - Notes: - - - The maximum input bits used by a length/distance pair is 15 bits for the - length code, 5 bits for the length extra, 15 bits for the distance code, - and 13 bits for the distance extra. This totals 48 bits, or six bytes. - Therefore if strm->avail_in >= 6, then there is enough input to avoid - checking for available input while decoding. - - - The maximum bytes that a single length/distance pair can output is 258 - bytes, which is the maximum length that can be coded. inflate_fast() - requires strm->avail_out >= 258 for each loop to avoid checking for - output space. - */ -void ZLIB_INTERNAL inflate_fast(strm, start) -z_streamp strm; -unsigned start; /* inflate()'s starting value for strm->avail_out */ -{ - struct inflate_state FAR *state; - z_const unsigned char FAR *in; /* local strm->next_in */ - z_const unsigned char FAR *last; /* have enough input while in < last */ - unsigned char FAR *out; /* local strm->next_out */ - unsigned char FAR *beg; /* inflate()'s initial strm->next_out */ - unsigned char FAR *end; /* while out < end, enough space available */ -#ifdef INFLATE_STRICT - unsigned dmax; /* maximum distance from zlib header */ -#endif - unsigned wsize; /* window size or zero if not using window */ - unsigned whave; /* valid bytes in the window */ - unsigned wnext; /* window write index */ - unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */ - unsigned long hold; /* local strm->hold */ - unsigned bits; /* local strm->bits */ - code const FAR *lcode; /* local strm->lencode */ - code const FAR *dcode; /* local strm->distcode */ - unsigned lmask; /* mask for first level of length codes */ - unsigned dmask; /* mask for first level of distance codes */ - code here; /* retrieved table entry */ - unsigned op; /* code bits, operation, extra bits, or */ - /* window position, window bytes to copy */ - unsigned len; /* match length, unused bytes */ - unsigned dist; /* match distance */ - unsigned char FAR *from; /* where to copy match from */ - - /* copy state to local variables */ - state = (struct inflate_state FAR *)strm->state; - in = strm->next_in; - last = in + (strm->avail_in - 5); - out = strm->next_out; - beg = out - (start - strm->avail_out); - end = out + (strm->avail_out - 257); -#ifdef INFLATE_STRICT - dmax = state->dmax; -#endif - wsize = state->wsize; - whave = state->whave; - wnext = state->wnext; - window = state->window; - hold = state->hold; - bits = state->bits; - lcode = state->lencode; - dcode = state->distcode; - lmask = (1U << state->lenbits) - 1; - dmask = (1U << state->distbits) - 1; - - /* decode literals and length/distances until end-of-block or not enough - input data or output space */ - do { - if (bits < 15) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - here = lcode[hold & lmask]; - dolen: - op = (unsigned)(here.bits); - hold >>= op; - bits -= op; - op = (unsigned)(here.op); - if (op == 0) { /* literal */ - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - *out++ = (unsigned char)(here.val); - } - else if (op & 16) { /* length base */ - len = (unsigned)(here.val); - op &= 15; /* number of extra bits */ - if (op) { - if (bits < op) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - len += (unsigned)hold & ((1U << op) - 1); - hold >>= op; - bits -= op; - } - Tracevv((stderr, "inflate: length %u\n", len)); - if (bits < 15) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - here = dcode[hold & dmask]; - dodist: - op = (unsigned)(here.bits); - hold >>= op; - bits -= op; - op = (unsigned)(here.op); - if (op & 16) { /* distance base */ - dist = (unsigned)(here.val); - op &= 15; /* number of extra bits */ - if (bits < op) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - if (bits < op) { - hold += (unsigned long)(*in++) << bits; - bits += 8; - } - } - dist += (unsigned)hold & ((1U << op) - 1); -#ifdef INFLATE_STRICT - if (dist > dmax) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#endif - hold >>= op; - bits -= op; - Tracevv((stderr, "inflate: distance %u\n", dist)); - op = (unsigned)(out - beg); /* max distance in output */ - if (dist > op) { /* see if copy from window */ - op = dist - op; /* distance back in window */ - if (op > whave) { - if (state->sane) { - strm->msg = - (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR - if (len <= op - whave) { - do { - *out++ = 0; - } while (--len); - continue; - } - len -= op - whave; - do { - *out++ = 0; - } while (--op > whave); - if (op == 0) { - from = out - dist; - do { - *out++ = *from++; - } while (--len); - continue; - } -#endif - } - from = window; - if (wnext == 0) { /* very common case */ - from += wsize - op; - if (op < len) { /* some from window */ - len -= op; - do { - *out++ = *from++; - } while (--op); - from = out - dist; /* rest from output */ - } - } - else if (wnext < op) { /* wrap around window */ - from += wsize + wnext - op; - op -= wnext; - if (op < len) { /* some from end of window */ - len -= op; - do { - *out++ = *from++; - } while (--op); - from = window; - if (wnext < len) { /* some from start of window */ - op = wnext; - len -= op; - do { - *out++ = *from++; - } while (--op); - from = out - dist; /* rest from output */ - } - } - } - else { /* contiguous in window */ - from += wnext - op; - if (op < len) { /* some from window */ - len -= op; - do { - *out++ = *from++; - } while (--op); - from = out - dist; /* rest from output */ - } - } - while (len > 2) { - *out++ = *from++; - *out++ = *from++; - *out++ = *from++; - len -= 3; - } - if (len) { - *out++ = *from++; - if (len > 1) - *out++ = *from++; - } - } - else { - from = out - dist; /* copy direct from output */ - do { /* minimum length is three */ - *out++ = *from++; - *out++ = *from++; - *out++ = *from++; - len -= 3; - } while (len > 2); - if (len) { - *out++ = *from++; - if (len > 1) - *out++ = *from++; - } - } - } - else if ((op & 64) == 0) { /* 2nd level distance code */ - here = dcode[here.val + (hold & ((1U << op) - 1))]; - goto dodist; - } - else { - strm->msg = (char *)"invalid distance code"; - state->mode = BAD; - break; - } - } - else if ((op & 64) == 0) { /* 2nd level length code */ - here = lcode[here.val + (hold & ((1U << op) - 1))]; - goto dolen; - } - else if (op & 32) { /* end-of-block */ - Tracevv((stderr, "inflate: end of block\n")); - state->mode = TYPE; - break; - } - else { - strm->msg = (char *)"invalid literal/length code"; - state->mode = BAD; - break; - } - } while (in < last && out < end); - - /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ - len = bits >> 3; - in -= len; - bits -= len << 3; - hold &= (1U << bits) - 1; - - /* update state and return */ - strm->next_in = in; - strm->next_out = out; - strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last)); - strm->avail_out = (unsigned)(out < end ? - 257 + (end - out) : 257 - (out - end)); - state->hold = hold; - state->bits = bits; - return; -} - -/* - inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe): - - Using bit fields for code structure - - Different op definition to avoid & for extra bits (do & for table bits) - - Three separate decoding do-loops for direct, window, and wnext == 0 - - Special case for distance > 1 copies to do overlapped load and store copy - - Explicit branch predictions (based on measured branch probabilities) - - Deferring match copy and interspersed it with decoding subsequent codes - - Swapping literal/length else - - Swapping window/direct else - - Larger unrolled copy loops (three is about right) - - Moving len -= 3 statement into middle of loop - */ - -#endif /* !ASMINF */ diff --git a/base/poco/Foundation/src/inffast.h b/base/poco/Foundation/src/inffast.h deleted file mode 100644 index e5c1aa4ca8c..00000000000 --- a/base/poco/Foundation/src/inffast.h +++ /dev/null @@ -1,11 +0,0 @@ -/* inffast.h -- header to use inffast.c - * Copyright (C) 1995-2003, 2010 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); diff --git a/base/poco/Foundation/src/inffixed.h b/base/poco/Foundation/src/inffixed.h deleted file mode 100644 index 4a39de52ca2..00000000000 --- a/base/poco/Foundation/src/inffixed.h +++ /dev/null @@ -1,68 +0,0 @@ -/* inffixed.h -- table for decoding fixed codes - * Generated automatically by makefixed(). - */ - -/* WARNING: this file should *not* be used by applications. - It is part of the implementation of this library and is - subject to change. Applications should only use zlib.h. - */ - -static const code lenfix[512] = { - {96, 7, 0}, {0, 8, 80}, {0, 8, 16}, {20, 8, 115}, {18, 7, 31}, {0, 8, 112}, {0, 8, 48}, {0, 9, 192}, {16, 7, 10}, {0, 8, 96}, - {0, 8, 32}, {0, 9, 160}, {0, 8, 0}, {0, 8, 128}, {0, 8, 64}, {0, 9, 224}, {16, 7, 6}, {0, 8, 88}, {0, 8, 24}, {0, 9, 144}, - {19, 7, 59}, {0, 8, 120}, {0, 8, 56}, {0, 9, 208}, {17, 7, 17}, {0, 8, 104}, {0, 8, 40}, {0, 9, 176}, {0, 8, 8}, {0, 8, 136}, - {0, 8, 72}, {0, 9, 240}, {16, 7, 4}, {0, 8, 84}, {0, 8, 20}, {21, 8, 227}, {19, 7, 43}, {0, 8, 116}, {0, 8, 52}, {0, 9, 200}, - {17, 7, 13}, {0, 8, 100}, {0, 8, 36}, {0, 9, 168}, {0, 8, 4}, {0, 8, 132}, {0, 8, 68}, {0, 9, 232}, {16, 7, 8}, {0, 8, 92}, - {0, 8, 28}, {0, 9, 152}, {20, 7, 83}, {0, 8, 124}, {0, 8, 60}, {0, 9, 216}, {18, 7, 23}, {0, 8, 108}, {0, 8, 44}, {0, 9, 184}, - {0, 8, 12}, {0, 8, 140}, {0, 8, 76}, {0, 9, 248}, {16, 7, 3}, {0, 8, 82}, {0, 8, 18}, {21, 8, 163}, {19, 7, 35}, {0, 8, 114}, - {0, 8, 50}, {0, 9, 196}, {17, 7, 11}, {0, 8, 98}, {0, 8, 34}, {0, 9, 164}, {0, 8, 2}, {0, 8, 130}, {0, 8, 66}, {0, 9, 228}, - {16, 7, 7}, {0, 8, 90}, {0, 8, 26}, {0, 9, 148}, {20, 7, 67}, {0, 8, 122}, {0, 8, 58}, {0, 9, 212}, {18, 7, 19}, {0, 8, 106}, - {0, 8, 42}, {0, 9, 180}, {0, 8, 10}, {0, 8, 138}, {0, 8, 74}, {0, 9, 244}, {16, 7, 5}, {0, 8, 86}, {0, 8, 22}, {64, 8, 0}, - {19, 7, 51}, {0, 8, 118}, {0, 8, 54}, {0, 9, 204}, {17, 7, 15}, {0, 8, 102}, {0, 8, 38}, {0, 9, 172}, {0, 8, 6}, {0, 8, 134}, - {0, 8, 70}, {0, 9, 236}, {16, 7, 9}, {0, 8, 94}, {0, 8, 30}, {0, 9, 156}, {20, 7, 99}, {0, 8, 126}, {0, 8, 62}, {0, 9, 220}, - {18, 7, 27}, {0, 8, 110}, {0, 8, 46}, {0, 9, 188}, {0, 8, 14}, {0, 8, 142}, {0, 8, 78}, {0, 9, 252}, {96, 7, 0}, {0, 8, 81}, - {0, 8, 17}, {21, 8, 131}, {18, 7, 31}, {0, 8, 113}, {0, 8, 49}, {0, 9, 194}, {16, 7, 10}, {0, 8, 97}, {0, 8, 33}, {0, 9, 162}, - {0, 8, 1}, {0, 8, 129}, {0, 8, 65}, {0, 9, 226}, {16, 7, 6}, {0, 8, 89}, {0, 8, 25}, {0, 9, 146}, {19, 7, 59}, {0, 8, 121}, - {0, 8, 57}, {0, 9, 210}, {17, 7, 17}, {0, 8, 105}, {0, 8, 41}, {0, 9, 178}, {0, 8, 9}, {0, 8, 137}, {0, 8, 73}, {0, 9, 242}, - {16, 7, 4}, {0, 8, 85}, {0, 8, 21}, {16, 8, 258}, {19, 7, 43}, {0, 8, 117}, {0, 8, 53}, {0, 9, 202}, {17, 7, 13}, {0, 8, 101}, - {0, 8, 37}, {0, 9, 170}, {0, 8, 5}, {0, 8, 133}, {0, 8, 69}, {0, 9, 234}, {16, 7, 8}, {0, 8, 93}, {0, 8, 29}, {0, 9, 154}, - {20, 7, 83}, {0, 8, 125}, {0, 8, 61}, {0, 9, 218}, {18, 7, 23}, {0, 8, 109}, {0, 8, 45}, {0, 9, 186}, {0, 8, 13}, {0, 8, 141}, - {0, 8, 77}, {0, 9, 250}, {16, 7, 3}, {0, 8, 83}, {0, 8, 19}, {21, 8, 195}, {19, 7, 35}, {0, 8, 115}, {0, 8, 51}, {0, 9, 198}, - {17, 7, 11}, {0, 8, 99}, {0, 8, 35}, {0, 9, 166}, {0, 8, 3}, {0, 8, 131}, {0, 8, 67}, {0, 9, 230}, {16, 7, 7}, {0, 8, 91}, - {0, 8, 27}, {0, 9, 150}, {20, 7, 67}, {0, 8, 123}, {0, 8, 59}, {0, 9, 214}, {18, 7, 19}, {0, 8, 107}, {0, 8, 43}, {0, 9, 182}, - {0, 8, 11}, {0, 8, 139}, {0, 8, 75}, {0, 9, 246}, {16, 7, 5}, {0, 8, 87}, {0, 8, 23}, {64, 8, 0}, {19, 7, 51}, {0, 8, 119}, - {0, 8, 55}, {0, 9, 206}, {17, 7, 15}, {0, 8, 103}, {0, 8, 39}, {0, 9, 174}, {0, 8, 7}, {0, 8, 135}, {0, 8, 71}, {0, 9, 238}, - {16, 7, 9}, {0, 8, 95}, {0, 8, 31}, {0, 9, 158}, {20, 7, 99}, {0, 8, 127}, {0, 8, 63}, {0, 9, 222}, {18, 7, 27}, {0, 8, 111}, - {0, 8, 47}, {0, 9, 190}, {0, 8, 15}, {0, 8, 143}, {0, 8, 79}, {0, 9, 254}, {96, 7, 0}, {0, 8, 80}, {0, 8, 16}, {20, 8, 115}, - {18, 7, 31}, {0, 8, 112}, {0, 8, 48}, {0, 9, 193}, {16, 7, 10}, {0, 8, 96}, {0, 8, 32}, {0, 9, 161}, {0, 8, 0}, {0, 8, 128}, - {0, 8, 64}, {0, 9, 225}, {16, 7, 6}, {0, 8, 88}, {0, 8, 24}, {0, 9, 145}, {19, 7, 59}, {0, 8, 120}, {0, 8, 56}, {0, 9, 209}, - {17, 7, 17}, {0, 8, 104}, {0, 8, 40}, {0, 9, 177}, {0, 8, 8}, {0, 8, 136}, {0, 8, 72}, {0, 9, 241}, {16, 7, 4}, {0, 8, 84}, - {0, 8, 20}, {21, 8, 227}, {19, 7, 43}, {0, 8, 116}, {0, 8, 52}, {0, 9, 201}, {17, 7, 13}, {0, 8, 100}, {0, 8, 36}, {0, 9, 169}, - {0, 8, 4}, {0, 8, 132}, {0, 8, 68}, {0, 9, 233}, {16, 7, 8}, {0, 8, 92}, {0, 8, 28}, {0, 9, 153}, {20, 7, 83}, {0, 8, 124}, - {0, 8, 60}, {0, 9, 217}, {18, 7, 23}, {0, 8, 108}, {0, 8, 44}, {0, 9, 185}, {0, 8, 12}, {0, 8, 140}, {0, 8, 76}, {0, 9, 249}, - {16, 7, 3}, {0, 8, 82}, {0, 8, 18}, {21, 8, 163}, {19, 7, 35}, {0, 8, 114}, {0, 8, 50}, {0, 9, 197}, {17, 7, 11}, {0, 8, 98}, - {0, 8, 34}, {0, 9, 165}, {0, 8, 2}, {0, 8, 130}, {0, 8, 66}, {0, 9, 229}, {16, 7, 7}, {0, 8, 90}, {0, 8, 26}, {0, 9, 149}, - {20, 7, 67}, {0, 8, 122}, {0, 8, 58}, {0, 9, 213}, {18, 7, 19}, {0, 8, 106}, {0, 8, 42}, {0, 9, 181}, {0, 8, 10}, {0, 8, 138}, - {0, 8, 74}, {0, 9, 245}, {16, 7, 5}, {0, 8, 86}, {0, 8, 22}, {64, 8, 0}, {19, 7, 51}, {0, 8, 118}, {0, 8, 54}, {0, 9, 205}, - {17, 7, 15}, {0, 8, 102}, {0, 8, 38}, {0, 9, 173}, {0, 8, 6}, {0, 8, 134}, {0, 8, 70}, {0, 9, 237}, {16, 7, 9}, {0, 8, 94}, - {0, 8, 30}, {0, 9, 157}, {20, 7, 99}, {0, 8, 126}, {0, 8, 62}, {0, 9, 221}, {18, 7, 27}, {0, 8, 110}, {0, 8, 46}, {0, 9, 189}, - {0, 8, 14}, {0, 8, 142}, {0, 8, 78}, {0, 9, 253}, {96, 7, 0}, {0, 8, 81}, {0, 8, 17}, {21, 8, 131}, {18, 7, 31}, {0, 8, 113}, - {0, 8, 49}, {0, 9, 195}, {16, 7, 10}, {0, 8, 97}, {0, 8, 33}, {0, 9, 163}, {0, 8, 1}, {0, 8, 129}, {0, 8, 65}, {0, 9, 227}, - {16, 7, 6}, {0, 8, 89}, {0, 8, 25}, {0, 9, 147}, {19, 7, 59}, {0, 8, 121}, {0, 8, 57}, {0, 9, 211}, {17, 7, 17}, {0, 8, 105}, - {0, 8, 41}, {0, 9, 179}, {0, 8, 9}, {0, 8, 137}, {0, 8, 73}, {0, 9, 243}, {16, 7, 4}, {0, 8, 85}, {0, 8, 21}, {16, 8, 258}, - {19, 7, 43}, {0, 8, 117}, {0, 8, 53}, {0, 9, 203}, {17, 7, 13}, {0, 8, 101}, {0, 8, 37}, {0, 9, 171}, {0, 8, 5}, {0, 8, 133}, - {0, 8, 69}, {0, 9, 235}, {16, 7, 8}, {0, 8, 93}, {0, 8, 29}, {0, 9, 155}, {20, 7, 83}, {0, 8, 125}, {0, 8, 61}, {0, 9, 219}, - {18, 7, 23}, {0, 8, 109}, {0, 8, 45}, {0, 9, 187}, {0, 8, 13}, {0, 8, 141}, {0, 8, 77}, {0, 9, 251}, {16, 7, 3}, {0, 8, 83}, - {0, 8, 19}, {21, 8, 195}, {19, 7, 35}, {0, 8, 115}, {0, 8, 51}, {0, 9, 199}, {17, 7, 11}, {0, 8, 99}, {0, 8, 35}, {0, 9, 167}, - {0, 8, 3}, {0, 8, 131}, {0, 8, 67}, {0, 9, 231}, {16, 7, 7}, {0, 8, 91}, {0, 8, 27}, {0, 9, 151}, {20, 7, 67}, {0, 8, 123}, - {0, 8, 59}, {0, 9, 215}, {18, 7, 19}, {0, 8, 107}, {0, 8, 43}, {0, 9, 183}, {0, 8, 11}, {0, 8, 139}, {0, 8, 75}, {0, 9, 247}, - {16, 7, 5}, {0, 8, 87}, {0, 8, 23}, {64, 8, 0}, {19, 7, 51}, {0, 8, 119}, {0, 8, 55}, {0, 9, 207}, {17, 7, 15}, {0, 8, 103}, - {0, 8, 39}, {0, 9, 175}, {0, 8, 7}, {0, 8, 135}, {0, 8, 71}, {0, 9, 239}, {16, 7, 9}, {0, 8, 95}, {0, 8, 31}, {0, 9, 159}, - {20, 7, 99}, {0, 8, 127}, {0, 8, 63}, {0, 9, 223}, {18, 7, 27}, {0, 8, 111}, {0, 8, 47}, {0, 9, 191}, {0, 8, 15}, {0, 8, 143}, - {0, 8, 79}, {0, 9, 255}}; - -static const code distfix[32] - = {{16, 5, 1}, {23, 5, 257}, {19, 5, 17}, {27, 5, 4097}, {17, 5, 5}, {25, 5, 1025}, {21, 5, 65}, {29, 5, 16385}, - {16, 5, 3}, {24, 5, 513}, {20, 5, 33}, {28, 5, 8193}, {18, 5, 9}, {26, 5, 2049}, {22, 5, 129}, {64, 5, 0}, - {16, 5, 2}, {23, 5, 385}, {19, 5, 25}, {27, 5, 6145}, {17, 5, 7}, {25, 5, 1537}, {21, 5, 97}, {29, 5, 24577}, - {16, 5, 4}, {24, 5, 769}, {20, 5, 49}, {28, 5, 12289}, {18, 5, 13}, {26, 5, 3073}, {22, 5, 193}, {64, 5, 0}}; diff --git a/base/poco/Foundation/src/inflate.c b/base/poco/Foundation/src/inflate.c deleted file mode 100644 index ac333e8c2ed..00000000000 --- a/base/poco/Foundation/src/inflate.c +++ /dev/null @@ -1,1561 +0,0 @@ -/* inflate.c -- zlib decompression - * Copyright (C) 1995-2016 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * Change history: - * - * 1.2.beta0 24 Nov 2002 - * - First version -- complete rewrite of inflate to simplify code, avoid - * creation of window when not needed, minimize use of window when it is - * needed, make inffast.c even faster, implement gzip decoding, and to - * improve code readability and style over the previous zlib inflate code - * - * 1.2.beta1 25 Nov 2002 - * - Use pointers for available input and output checking in inffast.c - * - Remove input and output counters in inffast.c - * - Change inffast.c entry and loop from avail_in >= 7 to >= 6 - * - Remove unnecessary second byte pull from length extra in inffast.c - * - Unroll direct copy to three copies per loop in inffast.c - * - * 1.2.beta2 4 Dec 2002 - * - Change external routine names to reduce potential conflicts - * - Correct filename to inffixed.h for fixed tables in inflate.c - * - Make hbuf[] unsigned char to match parameter type in inflate.c - * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset) - * to avoid negation problem on Alphas (64 bit) in inflate.c - * - * 1.2.beta3 22 Dec 2002 - * - Add comments on state->bits assertion in inffast.c - * - Add comments on op field in inftrees.h - * - Fix bug in reuse of allocated window after inflateReset() - * - Remove bit fields--back to byte structure for speed - * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths - * - Change post-increments to pre-increments in inflate_fast(), PPC biased? - * - Add compile time option, POSTINC, to use post-increments instead (Intel?) - * - Make MATCH copy in inflate() much faster for when inflate_fast() not used - * - Use local copies of stream next and avail values, as well as local bit - * buffer and bit count in inflate()--for speed when inflate_fast() not used - * - * 1.2.beta4 1 Jan 2003 - * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings - * - Move a comment on output buffer sizes from inffast.c to inflate.c - * - Add comments in inffast.c to introduce the inflate_fast() routine - * - Rearrange window copies in inflate_fast() for speed and simplification - * - Unroll last copy for window match in inflate_fast() - * - Use local copies of window variables in inflate_fast() for speed - * - Pull out common wnext == 0 case for speed in inflate_fast() - * - Make op and len in inflate_fast() unsigned for consistency - * - Add FAR to lcode and dcode declarations in inflate_fast() - * - Simplified bad distance check in inflate_fast() - * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new - * source file infback.c to provide a call-back interface to inflate for - * programs like gzip and unzip -- uses window as output buffer to avoid - * window copying - * - * 1.2.beta5 1 Jan 2003 - * - Improved inflateBack() interface to allow the caller to provide initial - * input in strm. - * - Fixed stored blocks bug in inflateBack() - * - * 1.2.beta6 4 Jan 2003 - * - Added comments in inffast.c on effectiveness of POSTINC - * - Typecasting all around to reduce compiler warnings - * - Changed loops from while (1) or do {} while (1) to for (;;), again to - * make compilers happy - * - Changed type of window in inflateBackInit() to unsigned char * - * - * 1.2.beta7 27 Jan 2003 - * - Changed many types to unsigned or unsigned short to avoid warnings - * - Added inflateCopy() function - * - * 1.2.0 9 Mar 2003 - * - Changed inflateBack() interface to provide separate opaque descriptors - * for the in() and out() functions - * - Changed inflateBack() argument and in_func typedef to swap the length - * and buffer address return values for the input function - * - Check next_in and next_out for Z_NULL on entry to inflate() - * - * The history for versions after 1.2.0 are in ChangeLog in zlib distribution. - */ - -#include "zutil.h" -#include "inftrees.h" -#include "inflate.h" -#include "inffast.h" - -#ifdef MAKEFIXED -# ifndef BUILDFIXED -# define BUILDFIXED -# endif -#endif - -/* function prototypes */ -local int inflateStateCheck OF((z_streamp strm)); -local void fixedtables OF((struct inflate_state FAR *state)); -local int updatewindow OF((z_streamp strm, const unsigned char FAR *end, - unsigned copy)); -#ifdef BUILDFIXED - void makefixed OF((void)); -#endif -local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf, - unsigned len)); - -local int inflateStateCheck(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - if (strm == Z_NULL || - strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) - return 1; - state = (struct inflate_state FAR *)strm->state; - if (state == Z_NULL || state->strm != strm || - state->mode < HEAD || state->mode > SYNC) - return 1; - return 0; -} - -int ZEXPORT inflateResetKeep(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - strm->total_in = strm->total_out = state->total = 0; - strm->msg = Z_NULL; - if (state->wrap) /* to support ill-conceived Java test suite */ - strm->adler = state->wrap & 1; - state->mode = HEAD; - state->last = 0; - state->havedict = 0; - state->dmax = 32768U; - state->head = Z_NULL; - state->hold = 0; - state->bits = 0; - state->lencode = state->distcode = state->next = state->codes; - state->sane = 1; - state->back = -1; - Tracev((stderr, "inflate: reset\n")); - return Z_OK; -} - -int ZEXPORT inflateReset(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - state->wsize = 0; - state->whave = 0; - state->wnext = 0; - return inflateResetKeep(strm); -} - -int ZEXPORT inflateReset2(strm, windowBits) -z_streamp strm; -int windowBits; -{ - int wrap; - struct inflate_state FAR *state; - - /* get the state */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - - /* extract wrap request from windowBits parameter */ - if (windowBits < 0) { - wrap = 0; - windowBits = -windowBits; - } - else { - wrap = (windowBits >> 4) + 5; -#ifdef GUNZIP - if (windowBits < 48) - windowBits &= 15; -#endif - } - - /* set number of window bits, free window if different */ - if (windowBits && (windowBits < 8 || windowBits > 15)) - return Z_STREAM_ERROR; - if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) { - ZFREE(strm, state->window); - state->window = Z_NULL; - } - - /* update state and reset the rest of it */ - state->wrap = wrap; - state->wbits = (unsigned)windowBits; - return inflateReset(strm); -} - -int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size) -z_streamp strm; -int windowBits; -const char *version; -int stream_size; -{ - int ret; - struct inflate_state FAR *state; - - if (version == Z_NULL || version[0] != ZLIB_VERSION[0] || - stream_size != (int)(sizeof(z_stream))) - return Z_VERSION_ERROR; - if (strm == Z_NULL) return Z_STREAM_ERROR; - strm->msg = Z_NULL; /* in case we return an error */ - if (strm->zalloc == (alloc_func)0) { -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zalloc = zcalloc; - strm->opaque = (voidpf)0; -#endif - } - if (strm->zfree == (free_func)0) -#ifdef Z_SOLO - return Z_STREAM_ERROR; -#else - strm->zfree = zcfree; -#endif - state = (struct inflate_state FAR *) - ZALLOC(strm, 1, sizeof(struct inflate_state)); - if (state == Z_NULL) return Z_MEM_ERROR; - Tracev((stderr, "inflate: allocated\n")); - strm->state = (struct internal_state FAR *)state; - state->strm = strm; - state->window = Z_NULL; - state->mode = HEAD; /* to pass state test in inflateReset2() */ - ret = inflateReset2(strm, windowBits); - if (ret != Z_OK) { - ZFREE(strm, state); - strm->state = Z_NULL; - } - return ret; -} - -int ZEXPORT inflateInit_(strm, version, stream_size) -z_streamp strm; -const char *version; -int stream_size; -{ - return inflateInit2_(strm, DEF_WBITS, version, stream_size); -} - -int ZEXPORT inflatePrime(strm, bits, value) -z_streamp strm; -int bits; -int value; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (bits < 0) { - state->hold = 0; - state->bits = 0; - return Z_OK; - } - if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR; - value &= (1L << bits) - 1; - state->hold += (unsigned)value << state->bits; - state->bits += (uInt)bits; - return Z_OK; -} - -/* - Return state with length and distance decoding tables and index sizes set to - fixed code decoding. Normally this returns fixed tables from inffixed.h. - If BUILDFIXED is defined, then instead this routine builds the tables the - first time it's called, and returns those tables the first time and - thereafter. This reduces the size of the code by about 2K bytes, in - exchange for a little execution time. However, BUILDFIXED should not be - used for threaded applications, since the rewriting of the tables and virgin - may not be thread-safe. - */ -local void fixedtables(state) -struct inflate_state FAR *state; -{ -#ifdef BUILDFIXED - static int virgin = 1; - static code *lenfix, *distfix; - static code fixed[544]; - - /* build fixed huffman tables if first call (may not be thread safe) */ - if (virgin) { - unsigned sym, bits; - static code *next; - - /* literal/length table */ - sym = 0; - while (sym < 144) state->lens[sym++] = 8; - while (sym < 256) state->lens[sym++] = 9; - while (sym < 280) state->lens[sym++] = 7; - while (sym < 288) state->lens[sym++] = 8; - next = fixed; - lenfix = next; - bits = 9; - inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work); - - /* distance table */ - sym = 0; - while (sym < 32) state->lens[sym++] = 5; - distfix = next; - bits = 5; - inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work); - - /* do this just once */ - virgin = 0; - } -#else /* !BUILDFIXED */ -# include "inffixed.h" -#endif /* BUILDFIXED */ - state->lencode = lenfix; - state->lenbits = 9; - state->distcode = distfix; - state->distbits = 5; -} - -#ifdef MAKEFIXED -#include - -/* - Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also - defines BUILDFIXED, so the tables are built on the fly. makefixed() writes - those tables to stdout, which would be piped to inffixed.h. A small program - can simply call makefixed to do this: - - void makefixed(void); - - int main(void) - { - makefixed(); - return 0; - } - - Then that can be linked with zlib built with MAKEFIXED defined and run: - - a.out > inffixed.h - */ -void makefixed() -{ - unsigned low, size; - struct inflate_state state; - - fixedtables(&state); - puts(" /* inffixed.h -- table for decoding fixed codes"); - puts(" * Generated automatically by makefixed()."); - puts(" */"); - puts(""); - puts(" /* WARNING: this file should *not* be used by applications."); - puts(" It is part of the implementation of this library and is"); - puts(" subject to change. Applications should only use zlib.h."); - puts(" */"); - puts(""); - size = 1U << 9; - printf(" static const code lenfix[%u] = {", size); - low = 0; - for (;;) { - if ((low % 7) == 0) printf("\n "); - printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op, - state.lencode[low].bits, state.lencode[low].val); - if (++low == size) break; - putchar(','); - } - puts("\n };"); - size = 1U << 5; - printf("\n static const code distfix[%u] = {", size); - low = 0; - for (;;) { - if ((low % 6) == 0) printf("\n "); - printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits, - state.distcode[low].val); - if (++low == size) break; - putchar(','); - } - puts("\n };"); -} -#endif /* MAKEFIXED */ - -/* - Update the window with the last wsize (normally 32K) bytes written before - returning. If window does not exist yet, create it. This is only called - when a window is already in use, or when output has been written during this - inflate call, but the end of the deflate stream has not been reached yet. - It is also called to create a window for dictionary data when a dictionary - is loaded. - - Providing output buffers larger than 32K to inflate() should provide a speed - advantage, since only the last 32K of output is copied to the sliding window - upon return from inflate(), and since all distances after the first 32K of - output will fall in the output data, making match copies simpler and faster. - The advantage may be dependent on the size of the processor's data caches. - */ -local int updatewindow(strm, end, copy) -z_streamp strm; -const Bytef *end; -unsigned copy; -{ - struct inflate_state FAR *state; - unsigned dist; - - state = (struct inflate_state FAR *)strm->state; - - /* if it hasn't been done already, allocate space for the window */ - if (state->window == Z_NULL) { - state->window = (unsigned char FAR *) - ZALLOC(strm, 1U << state->wbits, - sizeof(unsigned char)); - if (state->window == Z_NULL) return 1; - } - - /* if window not in use yet, initialize */ - if (state->wsize == 0) { - state->wsize = 1U << state->wbits; - state->wnext = 0; - state->whave = 0; - } - - /* copy state->wsize or less output bytes into the circular window */ - if (copy >= state->wsize) { - zmemcpy(state->window, end - state->wsize, state->wsize); - state->wnext = 0; - state->whave = state->wsize; - } - else { - dist = state->wsize - state->wnext; - if (dist > copy) dist = copy; - zmemcpy(state->window + state->wnext, end - copy, dist); - copy -= dist; - if (copy) { - zmemcpy(state->window, end - copy, copy); - state->wnext = copy; - state->whave = state->wsize; - } - else { - state->wnext += dist; - if (state->wnext == state->wsize) state->wnext = 0; - if (state->whave < state->wsize) state->whave += dist; - } - } - return 0; -} - -/* Macros for inflate(): */ - -/* check function to use adler32() for zlib or crc32() for gzip */ -#ifdef GUNZIP -# define UPDATE(check, buf, len) \ - (state->flags ? crc32(check, buf, len) : adler32(check, buf, len)) -#else -# define UPDATE(check, buf, len) adler32(check, buf, len) -#endif - -/* check macros for header crc */ -#ifdef GUNZIP -# define CRC2(check, word) \ - do { \ - hbuf[0] = (unsigned char)(word); \ - hbuf[1] = (unsigned char)((word) >> 8); \ - check = crc32(check, hbuf, 2); \ - } while (0) - -# define CRC4(check, word) \ - do { \ - hbuf[0] = (unsigned char)(word); \ - hbuf[1] = (unsigned char)((word) >> 8); \ - hbuf[2] = (unsigned char)((word) >> 16); \ - hbuf[3] = (unsigned char)((word) >> 24); \ - check = crc32(check, hbuf, 4); \ - } while (0) -#endif - -/* Load registers with state in inflate() for speed */ -#define LOAD() \ - do { \ - put = strm->next_out; \ - left = strm->avail_out; \ - next = strm->next_in; \ - have = strm->avail_in; \ - hold = state->hold; \ - bits = state->bits; \ - } while (0) - -/* Restore state from registers in inflate() */ -#define RESTORE() \ - do { \ - strm->next_out = put; \ - strm->avail_out = left; \ - strm->next_in = next; \ - strm->avail_in = have; \ - state->hold = hold; \ - state->bits = bits; \ - } while (0) - -/* Clear the input bit accumulator */ -#define INITBITS() \ - do { \ - hold = 0; \ - bits = 0; \ - } while (0) - -/* Get a byte of input into the bit accumulator, or return from inflate() - if there is no input available. */ -#define PULLBYTE() \ - do { \ - if (have == 0) goto inf_leave; \ - have--; \ - hold += (unsigned long)(*next++) << bits; \ - bits += 8; \ - } while (0) - -/* Assure that there are at least n bits in the bit accumulator. If there is - not enough available input to do that, then return from inflate(). */ -#define NEEDBITS(n) \ - do { \ - while (bits < (unsigned)(n)) \ - PULLBYTE(); \ - } while (0) - -/* Return the low n bits of the bit accumulator (n < 16) */ -#define BITS(n) \ - ((unsigned)hold & ((1U << (n)) - 1)) - -/* Remove n bits from the bit accumulator */ -#define DROPBITS(n) \ - do { \ - hold >>= (n); \ - bits -= (unsigned)(n); \ - } while (0) - -/* Remove zero to seven bits as needed to go to a byte boundary */ -#define BYTEBITS() \ - do { \ - hold >>= bits & 7; \ - bits -= bits & 7; \ - } while (0) - -/* - inflate() uses a state machine to process as much input data and generate as - much output data as possible before returning. The state machine is - structured roughly as follows: - - for (;;) switch (state) { - ... - case STATEn: - if (not enough input data or output space to make progress) - return; - ... make progress ... - state = STATEm; - break; - ... - } - - so when inflate() is called again, the same case is attempted again, and - if the appropriate resources are provided, the machine proceeds to the - next state. The NEEDBITS() macro is usually the way the state evaluates - whether it can proceed or should return. NEEDBITS() does the return if - the requested bits are not available. The typical use of the BITS macros - is: - - NEEDBITS(n); - ... do something with BITS(n) ... - DROPBITS(n); - - where NEEDBITS(n) either returns from inflate() if there isn't enough - input left to load n bits into the accumulator, or it continues. BITS(n) - gives the low n bits in the accumulator. When done, DROPBITS(n) drops - the low n bits off the accumulator. INITBITS() clears the accumulator - and sets the number of available bits to zero. BYTEBITS() discards just - enough bits to put the accumulator on a byte boundary. After BYTEBITS() - and a NEEDBITS(8), then BITS(8) would return the next byte in the stream. - - NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return - if there is no input available. The decoding of variable length codes uses - PULLBYTE() directly in order to pull just enough bytes to decode the next - code, and no more. - - Some states loop until they get enough input, making sure that enough - state information is maintained to continue the loop where it left off - if NEEDBITS() returns in the loop. For example, want, need, and keep - would all have to actually be part of the saved state in case NEEDBITS() - returns: - - case STATEw: - while (want < need) { - NEEDBITS(n); - keep[want++] = BITS(n); - DROPBITS(n); - } - state = STATEx; - case STATEx: - - As shown above, if the next state is also the next case, then the break - is omitted. - - A state may also return if there is not enough output space available to - complete that state. Those states are copying stored data, writing a - literal byte, and copying a matching string. - - When returning, a "goto inf_leave" is used to update the total counters, - update the check value, and determine whether any progress has been made - during that inflate() call in order to return the proper return code. - Progress is defined as a change in either strm->avail_in or strm->avail_out. - When there is a window, goto inf_leave will update the window with the last - output written. If a goto inf_leave occurs in the middle of decompression - and there is no window currently, goto inf_leave will create one and copy - output to the window for the next call of inflate(). - - In this implementation, the flush parameter of inflate() only affects the - return code (per zlib.h). inflate() always writes as much as possible to - strm->next_out, given the space available and the provided input--the effect - documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers - the allocation of and copying into a sliding window until necessary, which - provides the effect documented in zlib.h for Z_FINISH when the entire input - stream available. So the only thing the flush parameter actually does is: - when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it - will return Z_BUF_ERROR if it has not reached the end of the stream. - */ - -int ZEXPORT inflate(strm, flush) -z_streamp strm; -int flush; -{ - struct inflate_state FAR *state; - z_const unsigned char FAR *next; /* next input */ - unsigned char FAR *put; /* next output */ - unsigned have, left; /* available input and output */ - unsigned long hold; /* bit buffer */ - unsigned bits; /* bits in bit buffer */ - unsigned in, out; /* save starting available input and output */ - unsigned copy; /* number of stored or match bytes to copy */ - unsigned char FAR *from; /* where to copy match bytes from */ - code here; /* current decoding table entry */ - code last; /* parent table entry */ - unsigned len; /* length to copy for repeats, bits to drop */ - int ret; /* return code */ -#ifdef GUNZIP - unsigned char hbuf[4]; /* buffer for gzip header crc calculation */ -#endif - static const unsigned short order[19] = /* permutation of code lengths */ - {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - - if (inflateStateCheck(strm) || strm->next_out == Z_NULL || - (strm->next_in == Z_NULL && strm->avail_in != 0)) - return Z_STREAM_ERROR; - - state = (struct inflate_state FAR *)strm->state; - if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */ - LOAD(); - in = have; - out = left; - ret = Z_OK; - for (;;) - switch (state->mode) { - case HEAD: - if (state->wrap == 0) { - state->mode = TYPEDO; - break; - } - NEEDBITS(16); -#ifdef GUNZIP - if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */ - if (state->wbits == 0) - state->wbits = 15; - state->check = crc32(0L, Z_NULL, 0); - CRC2(state->check, hold); - INITBITS(); - state->mode = FLAGS; - break; - } - state->flags = 0; /* expect zlib header */ - if (state->head != Z_NULL) - state->head->done = -1; - if (!(state->wrap & 1) || /* check if zlib header allowed */ -#else - if ( -#endif - ((BITS(8) << 8) + (hold >> 8)) % 31) { - strm->msg = (char *)"incorrect header check"; - state->mode = BAD; - break; - } - if (BITS(4) != Z_DEFLATED) { - strm->msg = (char *)"unknown compression method"; - state->mode = BAD; - break; - } - DROPBITS(4); - len = BITS(4) + 8; - if (state->wbits == 0) - state->wbits = len; - if (len > 15 || len > state->wbits) { - strm->msg = (char *)"invalid window size"; - state->mode = BAD; - break; - } - state->dmax = 1U << len; - Tracev((stderr, "inflate: zlib header ok\n")); - strm->adler = state->check = adler32(0L, Z_NULL, 0); - state->mode = hold & 0x200 ? DICTID : TYPE; - INITBITS(); - break; -#ifdef GUNZIP - case FLAGS: - NEEDBITS(16); - state->flags = (int)(hold); - if ((state->flags & 0xff) != Z_DEFLATED) { - strm->msg = (char *)"unknown compression method"; - state->mode = BAD; - break; - } - if (state->flags & 0xe000) { - strm->msg = (char *)"unknown header flags set"; - state->mode = BAD; - break; - } - if (state->head != Z_NULL) - state->head->text = (int)((hold >> 8) & 1); - if ((state->flags & 0x0200) && (state->wrap & 4)) - CRC2(state->check, hold); - INITBITS(); - state->mode = TIME; - case TIME: - NEEDBITS(32); - if (state->head != Z_NULL) - state->head->time = hold; - if ((state->flags & 0x0200) && (state->wrap & 4)) - CRC4(state->check, hold); - INITBITS(); - state->mode = OS; - case OS: - NEEDBITS(16); - if (state->head != Z_NULL) { - state->head->xflags = (int)(hold & 0xff); - state->head->os = (int)(hold >> 8); - } - if ((state->flags & 0x0200) && (state->wrap & 4)) - CRC2(state->check, hold); - INITBITS(); - state->mode = EXLEN; - case EXLEN: - if (state->flags & 0x0400) { - NEEDBITS(16); - state->length = (unsigned)(hold); - if (state->head != Z_NULL) - state->head->extra_len = (unsigned)hold; - if ((state->flags & 0x0200) && (state->wrap & 4)) - CRC2(state->check, hold); - INITBITS(); - } - else if (state->head != Z_NULL) - state->head->extra = Z_NULL; - state->mode = EXTRA; - case EXTRA: - if (state->flags & 0x0400) { - copy = state->length; - if (copy > have) copy = have; - if (copy) { - if (state->head != Z_NULL && - state->head->extra != Z_NULL) { - len = state->head->extra_len - state->length; - zmemcpy(state->head->extra + len, next, - len + copy > state->head->extra_max ? - state->head->extra_max - len : copy); - } - if ((state->flags & 0x0200) && (state->wrap & 4)) - state->check = crc32(state->check, next, copy); - have -= copy; - next += copy; - state->length -= copy; - } - if (state->length) goto inf_leave; - } - state->length = 0; - state->mode = NAME; - case NAME: - if (state->flags & 0x0800) { - if (have == 0) goto inf_leave; - copy = 0; - do { - len = (unsigned)(next[copy++]); - if (state->head != Z_NULL && - state->head->name != Z_NULL && - state->length < state->head->name_max) - state->head->name[state->length++] = (Bytef)len; - } while (len && copy < have); - if ((state->flags & 0x0200) && (state->wrap & 4)) - state->check = crc32(state->check, next, copy); - have -= copy; - next += copy; - if (len) goto inf_leave; - } - else if (state->head != Z_NULL) - state->head->name = Z_NULL; - state->length = 0; - state->mode = COMMENT; - case COMMENT: - if (state->flags & 0x1000) { - if (have == 0) goto inf_leave; - copy = 0; - do { - len = (unsigned)(next[copy++]); - if (state->head != Z_NULL && - state->head->comment != Z_NULL && - state->length < state->head->comm_max) - state->head->comment[state->length++] = (Bytef)len; - } while (len && copy < have); - if ((state->flags & 0x0200) && (state->wrap & 4)) - state->check = crc32(state->check, next, copy); - have -= copy; - next += copy; - if (len) goto inf_leave; - } - else if (state->head != Z_NULL) - state->head->comment = Z_NULL; - state->mode = HCRC; - case HCRC: - if (state->flags & 0x0200) { - NEEDBITS(16); - if ((state->wrap & 4) && hold != (state->check & 0xffff)) { - strm->msg = (char *)"header crc mismatch"; - state->mode = BAD; - break; - } - INITBITS(); - } - if (state->head != Z_NULL) { - state->head->hcrc = (int)((state->flags >> 9) & 1); - state->head->done = 1; - } - strm->adler = state->check = crc32(0L, Z_NULL, 0); - state->mode = TYPE; - break; -#endif - case DICTID: - NEEDBITS(32); - strm->adler = state->check = ZSWAP32(hold); - INITBITS(); - state->mode = DICT; - case DICT: - if (state->havedict == 0) { - RESTORE(); - return Z_NEED_DICT; - } - strm->adler = state->check = adler32(0L, Z_NULL, 0); - state->mode = TYPE; - case TYPE: - if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave; - case TYPEDO: - if (state->last) { - BYTEBITS(); - state->mode = CHECK; - break; - } - NEEDBITS(3); - state->last = BITS(1); - DROPBITS(1); - switch (BITS(2)) { - case 0: /* stored block */ - Tracev((stderr, "inflate: stored block%s\n", - state->last ? " (last)" : "")); - state->mode = STORED; - break; - case 1: /* fixed block */ - fixedtables(state); - Tracev((stderr, "inflate: fixed codes block%s\n", - state->last ? " (last)" : "")); - state->mode = LEN_; /* decode codes */ - if (flush == Z_TREES) { - DROPBITS(2); - goto inf_leave; - } - break; - case 2: /* dynamic block */ - Tracev((stderr, "inflate: dynamic codes block%s\n", - state->last ? " (last)" : "")); - state->mode = TABLE; - break; - case 3: - strm->msg = (char *)"invalid block type"; - state->mode = BAD; - } - DROPBITS(2); - break; - case STORED: - BYTEBITS(); /* go to byte boundary */ - NEEDBITS(32); - if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) { - strm->msg = (char *)"invalid stored block lengths"; - state->mode = BAD; - break; - } - state->length = (unsigned)hold & 0xffff; - Tracev((stderr, "inflate: stored length %u\n", - state->length)); - INITBITS(); - state->mode = COPY_; - if (flush == Z_TREES) goto inf_leave; - case COPY_: - state->mode = COPY; - case COPY: - copy = state->length; - if (copy) { - if (copy > have) copy = have; - if (copy > left) copy = left; - if (copy == 0) goto inf_leave; - zmemcpy(put, next, copy); - have -= copy; - next += copy; - left -= copy; - put += copy; - state->length -= copy; - break; - } - Tracev((stderr, "inflate: stored end\n")); - state->mode = TYPE; - break; - case TABLE: - NEEDBITS(14); - state->nlen = BITS(5) + 257; - DROPBITS(5); - state->ndist = BITS(5) + 1; - DROPBITS(5); - state->ncode = BITS(4) + 4; - DROPBITS(4); -#ifndef PKZIP_BUG_WORKAROUND - if (state->nlen > 286 || state->ndist > 30) { - strm->msg = (char *)"too many length or distance symbols"; - state->mode = BAD; - break; - } -#endif - Tracev((stderr, "inflate: table sizes ok\n")); - state->have = 0; - state->mode = LENLENS; - case LENLENS: - while (state->have < state->ncode) { - NEEDBITS(3); - state->lens[order[state->have++]] = (unsigned short)BITS(3); - DROPBITS(3); - } - while (state->have < 19) - state->lens[order[state->have++]] = 0; - state->next = state->codes; - state->lencode = (const code FAR *)(state->next); - state->lenbits = 7; - ret = inflate_table(CODES, state->lens, 19, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid code lengths set"; - state->mode = BAD; - break; - } - Tracev((stderr, "inflate: code lengths ok\n")); - state->have = 0; - state->mode = CODELENS; - case CODELENS: - while (state->have < state->nlen + state->ndist) { - for (;;) { - here = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if (here.val < 16) { - DROPBITS(here.bits); - state->lens[state->have++] = here.val; - } - else { - if (here.val == 16) { - NEEDBITS(here.bits + 2); - DROPBITS(here.bits); - if (state->have == 0) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - len = state->lens[state->have - 1]; - copy = 3 + BITS(2); - DROPBITS(2); - } - else if (here.val == 17) { - NEEDBITS(here.bits + 3); - DROPBITS(here.bits); - len = 0; - copy = 3 + BITS(3); - DROPBITS(3); - } - else { - NEEDBITS(here.bits + 7); - DROPBITS(here.bits); - len = 0; - copy = 11 + BITS(7); - DROPBITS(7); - } - if (state->have + copy > state->nlen + state->ndist) { - strm->msg = (char *)"invalid bit length repeat"; - state->mode = BAD; - break; - } - while (copy--) - state->lens[state->have++] = (unsigned short)len; - } - } - - /* handle error breaks in while */ - if (state->mode == BAD) break; - - /* check for end-of-block code (better have one) */ - if (state->lens[256] == 0) { - strm->msg = (char *)"invalid code -- missing end-of-block"; - state->mode = BAD; - break; - } - - /* build code tables -- note: do not change the lenbits or distbits - values here (9 and 6) without reading the comments in inftrees.h - concerning the ENOUGH constants, which depend on those values */ - state->next = state->codes; - state->lencode = (const code FAR *)(state->next); - state->lenbits = 9; - ret = inflate_table(LENS, state->lens, state->nlen, &(state->next), - &(state->lenbits), state->work); - if (ret) { - strm->msg = (char *)"invalid literal/lengths set"; - state->mode = BAD; - break; - } - state->distcode = (const code FAR *)(state->next); - state->distbits = 6; - ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist, - &(state->next), &(state->distbits), state->work); - if (ret) { - strm->msg = (char *)"invalid distances set"; - state->mode = BAD; - break; - } - Tracev((stderr, "inflate: codes ok\n")); - state->mode = LEN_; - if (flush == Z_TREES) goto inf_leave; - case LEN_: - state->mode = LEN; - case LEN: - if (have >= 6 && left >= 258) { - RESTORE(); - inflate_fast(strm, out); - LOAD(); - if (state->mode == TYPE) - state->back = -1; - break; - } - state->back = 0; - for (;;) { - here = state->lencode[BITS(state->lenbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if (here.op && (here.op & 0xf0) == 0) { - last = here; - for (;;) { - here = state->lencode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + here.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - state->back += last.bits; - } - DROPBITS(here.bits); - state->back += here.bits; - state->length = (unsigned)here.val; - if ((int)(here.op) == 0) { - Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ? - "inflate: literal '%c'\n" : - "inflate: literal 0x%02x\n", here.val)); - state->mode = LIT; - break; - } - if (here.op & 32) { - Tracevv((stderr, "inflate: end of block\n")); - state->back = -1; - state->mode = TYPE; - break; - } - if (here.op & 64) { - strm->msg = (char *)"invalid literal/length code"; - state->mode = BAD; - break; - } - state->extra = (unsigned)(here.op) & 15; - state->mode = LENEXT; - case LENEXT: - if (state->extra) { - NEEDBITS(state->extra); - state->length += BITS(state->extra); - DROPBITS(state->extra); - state->back += state->extra; - } - Tracevv((stderr, "inflate: length %u\n", state->length)); - state->was = state->length; - state->mode = DIST; - case DIST: - for (;;) { - here = state->distcode[BITS(state->distbits)]; - if ((unsigned)(here.bits) <= bits) break; - PULLBYTE(); - } - if ((here.op & 0xf0) == 0) { - last = here; - for (;;) { - here = state->distcode[last.val + - (BITS(last.bits + last.op) >> last.bits)]; - if ((unsigned)(last.bits + here.bits) <= bits) break; - PULLBYTE(); - } - DROPBITS(last.bits); - state->back += last.bits; - } - DROPBITS(here.bits); - state->back += here.bits; - if (here.op & 64) { - strm->msg = (char *)"invalid distance code"; - state->mode = BAD; - break; - } - state->offset = (unsigned)here.val; - state->extra = (unsigned)(here.op) & 15; - state->mode = DISTEXT; - case DISTEXT: - if (state->extra) { - NEEDBITS(state->extra); - state->offset += BITS(state->extra); - DROPBITS(state->extra); - state->back += state->extra; - } -#ifdef INFLATE_STRICT - if (state->offset > state->dmax) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#endif - Tracevv((stderr, "inflate: distance %u\n", state->offset)); - state->mode = MATCH; - case MATCH: - if (left == 0) goto inf_leave; - copy = out - left; - if (state->offset > copy) { /* copy from window */ - copy = state->offset - copy; - if (copy > state->whave) { - if (state->sane) { - strm->msg = (char *)"invalid distance too far back"; - state->mode = BAD; - break; - } -#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR - Trace((stderr, "inflate.c too far\n")); - copy -= state->whave; - if (copy > state->length) copy = state->length; - if (copy > left) copy = left; - left -= copy; - state->length -= copy; - do { - *put++ = 0; - } while (--copy); - if (state->length == 0) state->mode = LEN; - break; -#endif - } - if (copy > state->wnext) { - copy -= state->wnext; - from = state->window + (state->wsize - copy); - } - else - from = state->window + (state->wnext - copy); - if (copy > state->length) copy = state->length; - } - else { /* copy from output */ - from = put - state->offset; - copy = state->length; - } - if (copy > left) copy = left; - left -= copy; - state->length -= copy; - do { - *put++ = *from++; - } while (--copy); - if (state->length == 0) state->mode = LEN; - break; - case LIT: - if (left == 0) goto inf_leave; - *put++ = (unsigned char)(state->length); - left--; - state->mode = LEN; - break; - case CHECK: - if (state->wrap) { - NEEDBITS(32); - out -= left; - strm->total_out += out; - state->total += out; - if ((state->wrap & 4) && out) - strm->adler = state->check = - UPDATE(state->check, put - out, out); - out = left; - if ((state->wrap & 4) && ( -#ifdef GUNZIP - state->flags ? hold : -#endif - ZSWAP32(hold)) != state->check) { - strm->msg = (char *)"incorrect data check"; - state->mode = BAD; - break; - } - INITBITS(); - Tracev((stderr, "inflate: check matches trailer\n")); - } -#ifdef GUNZIP - state->mode = LENGTH; - case LENGTH: - if (state->wrap && state->flags) { - NEEDBITS(32); - if (hold != (state->total & 0xffffffffUL)) { - strm->msg = (char *)"incorrect length check"; - state->mode = BAD; - break; - } - INITBITS(); - Tracev((stderr, "inflate: length matches trailer\n")); - } -#endif - state->mode = DONE; - case DONE: - ret = Z_STREAM_END; - goto inf_leave; - case BAD: - ret = Z_DATA_ERROR; - goto inf_leave; - case MEM: - return Z_MEM_ERROR; - case SYNC: - default: - return Z_STREAM_ERROR; - } - - /* - Return from inflate(), updating the total counts and the check value. - If there was no progress during the inflate() call, return a buffer - error. Call updatewindow() to create and/or update the window state. - Note: a memory error from inflate() is non-recoverable. - */ - inf_leave: - RESTORE(); - if (state->wsize || (out != strm->avail_out && state->mode < BAD && - (state->mode < CHECK || flush != Z_FINISH))) - if (updatewindow(strm, strm->next_out, out - strm->avail_out)) { - state->mode = MEM; - return Z_MEM_ERROR; - } - in -= strm->avail_in; - out -= strm->avail_out; - strm->total_in += in; - strm->total_out += out; - state->total += out; - if ((state->wrap & 4) && out) - strm->adler = state->check = - UPDATE(state->check, strm->next_out - out, out); - strm->data_type = (int)state->bits + (state->last ? 64 : 0) + - (state->mode == TYPE ? 128 : 0) + - (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0); - if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK) - ret = Z_BUF_ERROR; - return ret; -} - -int ZEXPORT inflateEnd(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - if (inflateStateCheck(strm)) - return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (state->window != Z_NULL) ZFREE(strm, state->window); - ZFREE(strm, strm->state); - strm->state = Z_NULL; - Tracev((stderr, "inflate: end\n")); - return Z_OK; -} - -int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength) -z_streamp strm; -Bytef *dictionary; -uInt *dictLength; -{ - struct inflate_state FAR *state; - - /* check state */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - - /* copy dictionary */ - if (state->whave && dictionary != Z_NULL) { - zmemcpy(dictionary, state->window + state->wnext, - state->whave - state->wnext); - zmemcpy(dictionary + state->whave - state->wnext, - state->window, state->wnext); - } - if (dictLength != Z_NULL) - *dictLength = state->whave; - return Z_OK; -} - -int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength) -z_streamp strm; -const Bytef *dictionary; -uInt dictLength; -{ - struct inflate_state FAR *state; - unsigned long dictid; - int ret; - - /* check state */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (state->wrap != 0 && state->mode != DICT) - return Z_STREAM_ERROR; - - /* check for correct dictionary identifier */ - if (state->mode == DICT) { - dictid = adler32(0L, Z_NULL, 0); - dictid = adler32(dictid, dictionary, dictLength); - if (dictid != state->check) - return Z_DATA_ERROR; - } - - /* copy dictionary to window using updatewindow(), which will amend the - existing dictionary if appropriate */ - ret = updatewindow(strm, dictionary + dictLength, dictLength); - if (ret) { - state->mode = MEM; - return Z_MEM_ERROR; - } - state->havedict = 1; - Tracev((stderr, "inflate: dictionary set\n")); - return Z_OK; -} - -int ZEXPORT inflateGetHeader(strm, head) -z_streamp strm; -gz_headerp head; -{ - struct inflate_state FAR *state; - - /* check state */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if ((state->wrap & 2) == 0) return Z_STREAM_ERROR; - - /* save header structure */ - state->head = head; - head->done = 0; - return Z_OK; -} - -/* - Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found - or when out of input. When called, *have is the number of pattern bytes - found in order so far, in 0..3. On return *have is updated to the new - state. If on return *have equals four, then the pattern was found and the - return value is how many bytes were read including the last byte of the - pattern. If *have is less than four, then the pattern has not been found - yet and the return value is len. In the latter case, syncsearch() can be - called again with more data and the *have state. *have is initialized to - zero for the first call. - */ -local unsigned syncsearch(have, buf, len) -unsigned FAR *have; -const unsigned char FAR *buf; -unsigned len; -{ - unsigned got; - unsigned next; - - got = *have; - next = 0; - while (next < len && got < 4) { - if ((int)(buf[next]) == (got < 2 ? 0 : 0xff)) - got++; - else if (buf[next]) - got = 0; - else - got = 4 - got; - next++; - } - *have = got; - return next; -} - -int ZEXPORT inflateSync(strm) -z_streamp strm; -{ - unsigned len; /* number of bytes to look at or looked at */ - unsigned long in, out; /* temporary to save total_in and total_out */ - unsigned char buf[4]; /* to restore bit buffer to byte string */ - struct inflate_state FAR *state; - - /* check parameters */ - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR; - - /* if first time, start search in bit buffer */ - if (state->mode != SYNC) { - state->mode = SYNC; - state->hold <<= state->bits & 7; - state->bits -= state->bits & 7; - len = 0; - while (state->bits >= 8) { - buf[len++] = (unsigned char)(state->hold); - state->hold >>= 8; - state->bits -= 8; - } - state->have = 0; - syncsearch(&(state->have), buf, len); - } - - /* search available input */ - len = syncsearch(&(state->have), strm->next_in, strm->avail_in); - strm->avail_in -= len; - strm->next_in += len; - strm->total_in += len; - - /* return no joy or set up to restart inflate() on a new block */ - if (state->have != 4) return Z_DATA_ERROR; - in = strm->total_in; out = strm->total_out; - inflateReset(strm); - strm->total_in = in; strm->total_out = out; - state->mode = TYPE; - return Z_OK; -} - -/* - Returns true if inflate is currently at the end of a block generated by - Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP - implementation to provide an additional safety check. PPP uses - Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored - block. When decompressing, PPP checks that at the end of input packet, - inflate is waiting for these length bytes. - */ -int ZEXPORT inflateSyncPoint(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - return state->mode == STORED && state->bits == 0; -} - -int ZEXPORT inflateCopy(dest, source) -z_streamp dest; -z_streamp source; -{ - struct inflate_state FAR *state; - struct inflate_state FAR *copy; - unsigned char FAR *window; - unsigned wsize; - - /* check input */ - if (inflateStateCheck(source) || dest == Z_NULL) - return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)source->state; - - /* allocate space */ - copy = (struct inflate_state FAR *) - ZALLOC(source, 1, sizeof(struct inflate_state)); - if (copy == Z_NULL) return Z_MEM_ERROR; - window = Z_NULL; - if (state->window != Z_NULL) { - window = (unsigned char FAR *) - ZALLOC(source, 1U << state->wbits, sizeof(unsigned char)); - if (window == Z_NULL) { - ZFREE(source, copy); - return Z_MEM_ERROR; - } - } - - /* copy state */ - zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream)); - zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state)); - copy->strm = dest; - if (state->lencode >= state->codes && - state->lencode <= state->codes + ENOUGH - 1) { - copy->lencode = copy->codes + (state->lencode - state->codes); - copy->distcode = copy->codes + (state->distcode - state->codes); - } - copy->next = copy->codes + (state->next - state->codes); - if (window != Z_NULL) { - wsize = 1U << state->wbits; - zmemcpy(window, state->window, wsize); - } - copy->window = window; - dest->state = (struct internal_state FAR *)copy; - return Z_OK; -} - -int ZEXPORT inflateUndermine(strm, subvert) -z_streamp strm; -int subvert; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; -#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR - state->sane = !subvert; - return Z_OK; -#else - (void)subvert; - state->sane = 1; - return Z_DATA_ERROR; -#endif -} - -int ZEXPORT inflateValidate(strm, check) -z_streamp strm; -int check; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) return Z_STREAM_ERROR; - state = (struct inflate_state FAR *)strm->state; - if (check) - state->wrap |= 4; - else - state->wrap &= ~4; - return Z_OK; -} - -long ZEXPORT inflateMark(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - - if (inflateStateCheck(strm)) - return -(1L << 16); - state = (struct inflate_state FAR *)strm->state; - return (long)(((unsigned long)((long)state->back)) << 16) + - (state->mode == COPY ? state->length : - (state->mode == MATCH ? state->was - state->length : 0)); -} - -unsigned long ZEXPORT inflateCodesUsed(strm) -z_streamp strm; -{ - struct inflate_state FAR *state; - if (inflateStateCheck(strm)) return (unsigned long)-1; - state = (struct inflate_state FAR *)strm->state; - return (unsigned long)(state->next - state->codes); -} diff --git a/base/poco/Foundation/src/inflate.h b/base/poco/Foundation/src/inflate.h deleted file mode 100644 index 6096e40fb35..00000000000 --- a/base/poco/Foundation/src/inflate.h +++ /dev/null @@ -1,127 +0,0 @@ -/* inflate.h -- internal inflate state definition - * Copyright (C) 1995-2016 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* define NO_GZIP when compiling if you want to disable gzip header and - trailer decoding by inflate(). NO_GZIP would be used to avoid linking in - the crc code when it is not needed. For shared libraries, gzip decoding - should be left enabled. */ -#ifndef NO_GZIP -# define GUNZIP -#endif - -/* Possible inflate modes between inflate() calls */ -typedef enum -{ - HEAD = 16180, /* i: waiting for magic header */ - FLAGS, /* i: waiting for method and flags (gzip) */ - TIME, /* i: waiting for modification time (gzip) */ - OS, /* i: waiting for extra flags and operating system (gzip) */ - EXLEN, /* i: waiting for extra length (gzip) */ - EXTRA, /* i: waiting for extra bytes (gzip) */ - NAME, /* i: waiting for end of file name (gzip) */ - COMMENT, /* i: waiting for end of comment (gzip) */ - HCRC, /* i: waiting for header crc (gzip) */ - DICTID, /* i: waiting for dictionary check value */ - DICT, /* waiting for inflateSetDictionary() call */ - TYPE, /* i: waiting for type bits, including last-flag bit */ - TYPEDO, /* i: same, but skip check to exit inflate on new block */ - STORED, /* i: waiting for stored size (length and complement) */ - COPY_, /* i/o: same as COPY below, but only first time in */ - COPY, /* i/o: waiting for input or output to copy stored block */ - TABLE, /* i: waiting for dynamic block table lengths */ - LENLENS, /* i: waiting for code length code lengths */ - CODELENS, /* i: waiting for length/lit and distance code lengths */ - LEN_, /* i: same as LEN below, but only first time in */ - LEN, /* i: waiting for length/lit/eob code */ - LENEXT, /* i: waiting for length extra bits */ - DIST, /* i: waiting for distance code */ - DISTEXT, /* i: waiting for distance extra bits */ - MATCH, /* o: waiting for output space to copy string */ - LIT, /* o: waiting for output space to write literal */ - CHECK, /* i: waiting for 32-bit check value */ - LENGTH, /* i: waiting for 32-bit length (gzip) */ - DONE, /* finished check, done -- remain here until reset */ - BAD, /* got a data error -- remain here until reset */ - MEM, /* got an inflate() memory error -- remain here until reset */ - SYNC /* looking for synchronization bytes to restart inflate() */ -} inflate_mode; - -/* - State transitions between above modes - - - (most modes can go to BAD or MEM on error -- not shown for clarity) - - Process header: - HEAD -> (gzip) or (zlib) or (raw) - (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> - HCRC -> TYPE - (zlib) -> DICTID or TYPE - DICTID -> DICT -> TYPE - (raw) -> TYPEDO - Read deflate blocks: - TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK - STORED -> COPY_ -> COPY -> TYPE - TABLE -> LENLENS -> CODELENS -> LEN_ - LEN_ -> LEN - Read deflate codes in fixed or dynamic block: - LEN -> LENEXT or LIT or TYPE - LENEXT -> DIST -> DISTEXT -> MATCH -> LEN - LIT -> LEN - Process trailer: - CHECK -> LENGTH -> DONE - */ - -/* State maintained between inflate() calls -- approximately 7K bytes, not - including the allocated sliding window, which is up to 32K bytes. */ -struct inflate_state -{ - z_streamp strm; /* pointer back to this zlib stream */ - inflate_mode mode; /* current inflate mode */ - int last; /* true if processing last block */ - int wrap; /* bit 0 true for zlib, bit 1 true for gzip, - bit 2 true to validate check value */ - int havedict; /* true if dictionary provided */ - int flags; /* gzip header method and flags (0 if zlib) */ - unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ - unsigned long check; /* protected copy of check value */ - unsigned long total; /* protected copy of output count */ - gz_headerp head; /* where to save gzip header information */ - /* sliding window */ - unsigned wbits; /* log base 2 of requested window size */ - unsigned wsize; /* window size or zero if not using window */ - unsigned whave; /* valid bytes in the window */ - unsigned wnext; /* window write index */ - unsigned char FAR * window; /* allocated sliding window, if needed */ - /* bit accumulator */ - unsigned long hold; /* input bit accumulator */ - unsigned bits; /* number of bits in "in" */ - /* for string and stored block copying */ - unsigned length; /* literal or length of data to copy */ - unsigned offset; /* distance back to copy string from */ - /* for table and code decoding */ - unsigned extra; /* extra bits needed */ - /* fixed and dynamic code tables */ - code const FAR * lencode; /* starting table for length/literal codes */ - code const FAR * distcode; /* starting table for distance codes */ - unsigned lenbits; /* index bits for lencode */ - unsigned distbits; /* index bits for distcode */ - /* dynamic table building */ - unsigned ncode; /* number of code length code lengths */ - unsigned nlen; /* number of length code lengths */ - unsigned ndist; /* number of distance code lengths */ - unsigned have; /* number of code lengths in lens[] */ - code FAR * next; /* next available space in codes[] */ - unsigned short lens[320]; /* temporary storage for code lengths */ - unsigned short work[288]; /* work area for code table building */ - code codes[ENOUGH]; /* space for code tables */ - int sane; /* if false, allow invalid distance too far */ - int back; /* bits back of last unprocessed length/lit */ - unsigned was; /* initial length of match */ -}; diff --git a/base/poco/Foundation/src/inftrees.c b/base/poco/Foundation/src/inftrees.c deleted file mode 100644 index 2ea08fc13ea..00000000000 --- a/base/poco/Foundation/src/inftrees.c +++ /dev/null @@ -1,304 +0,0 @@ -/* inftrees.c -- generate Huffman trees for efficient decoding - * Copyright (C) 1995-2017 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -#include "zutil.h" -#include "inftrees.h" - -#define MAXBITS 15 - -const char inflate_copyright[] = - " inflate 1.2.11 Copyright 1995-2017 Mark Adler "; -/* - If you use the zlib library in a product, an acknowledgment is welcome - in the documentation of your product. If for some reason you cannot - include such an acknowledgment, I would appreciate that you keep this - copyright string in the executable of your product. - */ - -/* - Build a set of tables to decode the provided canonical Huffman code. - The code lengths are lens[0..codes-1]. The result starts at *table, - whose indices are 0..2^bits-1. work is a writable array of at least - lens shorts, which is used as a work area. type is the type of code - to be generated, CODES, LENS, or DISTS. On return, zero is success, - -1 is an invalid code, and +1 means that ENOUGH isn't enough. table - on return points to the next available entry's address. bits is the - requested root table index bits, and on return it is the actual root - table index bits. It will differ if the request is greater than the - longest code or if it is less than the shortest code. - */ -int ZLIB_INTERNAL inflate_table(type, lens, codes, table, bits, work) -codetype type; -unsigned short FAR *lens; -unsigned codes; -code FAR * FAR *table; -unsigned FAR *bits; -unsigned short FAR *work; -{ - unsigned len; /* a code's length in bits */ - unsigned sym; /* index of code symbols */ - unsigned min, max; /* minimum and maximum code lengths */ - unsigned root; /* number of index bits for root table */ - unsigned curr; /* number of index bits for current table */ - unsigned drop; /* code bits to drop for sub-table */ - int left; /* number of prefix codes available */ - unsigned used; /* code entries in table used */ - unsigned huff; /* Huffman code */ - unsigned incr; /* for incrementing code, index */ - unsigned fill; /* index for replicating entries */ - unsigned low; /* low bits for current root entry */ - unsigned mask; /* mask for low root bits */ - code here; /* table entry for duplication */ - code FAR *next; /* next available space in table */ - const unsigned short FAR *base; /* base value table to use */ - const unsigned short FAR *extra; /* extra bits table to use */ - unsigned match; /* use base and extra for symbol >= match */ - unsigned short count[MAXBITS+1]; /* number of codes of each length */ - unsigned short offs[MAXBITS+1]; /* offsets in table for each length */ - static const unsigned short lbase[31] = { /* Length codes 257..285 base */ - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, - 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; - static const unsigned short lext[31] = { /* Length codes 257..285 extra */ - 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18, - 19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202}; - static const unsigned short dbase[32] = { /* Distance codes 0..29 base */ - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, - 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, - 8193, 12289, 16385, 24577, 0, 0}; - static const unsigned short dext[32] = { /* Distance codes 0..29 extra */ - 16, 16, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, - 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, - 28, 28, 29, 29, 64, 64}; - - /* - Process a set of code lengths to create a canonical Huffman code. The - code lengths are lens[0..codes-1]. Each length corresponds to the - symbols 0..codes-1. The Huffman code is generated by first sorting the - symbols by length from short to long, and retaining the symbol order - for codes with equal lengths. Then the code starts with all zero bits - for the first code of the shortest length, and the codes are integer - increments for the same length, and zeros are appended as the length - increases. For the deflate format, these bits are stored backwards - from their more natural integer increment ordering, and so when the - decoding tables are built in the large loop below, the integer codes - are incremented backwards. - - This routine assumes, but does not check, that all of the entries in - lens[] are in the range 0..MAXBITS. The caller must assure this. - 1..MAXBITS is interpreted as that code length. zero means that that - symbol does not occur in this code. - - The codes are sorted by computing a count of codes for each length, - creating from that a table of starting indices for each length in the - sorted table, and then entering the symbols in order in the sorted - table. The sorted table is work[], with that space being provided by - the caller. - - The length counts are used for other purposes as well, i.e. finding - the minimum and maximum length codes, determining if there are any - codes at all, checking for a valid set of lengths, and looking ahead - at length counts to determine sub-table sizes when building the - decoding tables. - */ - - /* accumulate lengths for codes (assumes lens[] all in 0..MAXBITS) */ - for (len = 0; len <= MAXBITS; len++) - count[len] = 0; - for (sym = 0; sym < codes; sym++) - count[lens[sym]]++; - - /* bound code lengths, force root to be within code lengths */ - root = *bits; - for (max = MAXBITS; max >= 1; max--) - if (count[max] != 0) break; - if (root > max) root = max; - if (max == 0) { /* no symbols to code at all */ - here.op = (unsigned char)64; /* invalid code marker */ - here.bits = (unsigned char)1; - here.val = (unsigned short)0; - *(*table)++ = here; /* make a table to force an error */ - *(*table)++ = here; - *bits = 1; - return 0; /* no symbols, but wait for decoding to report error */ - } - for (min = 1; min < max; min++) - if (count[min] != 0) break; - if (root < min) root = min; - - /* check for an over-subscribed or incomplete set of lengths */ - left = 1; - for (len = 1; len <= MAXBITS; len++) { - left <<= 1; - left -= count[len]; - if (left < 0) return -1; /* over-subscribed */ - } - if (left > 0 && (type == CODES || max != 1)) - return -1; /* incomplete set */ - - /* generate offsets into symbol table for each length for sorting */ - offs[1] = 0; - for (len = 1; len < MAXBITS; len++) - offs[len + 1] = offs[len] + count[len]; - - /* sort symbols by length, by symbol order within each length */ - for (sym = 0; sym < codes; sym++) - if (lens[sym] != 0) work[offs[lens[sym]]++] = (unsigned short)sym; - - /* - Create and fill in decoding tables. In this loop, the table being - filled is at next and has curr index bits. The code being used is huff - with length len. That code is converted to an index by dropping drop - bits off of the bottom. For codes where len is less than drop + curr, - those top drop + curr - len bits are incremented through all values to - fill the table with replicated entries. - - root is the number of index bits for the root table. When len exceeds - root, sub-tables are created pointed to by the root entry with an index - of the low root bits of huff. This is saved in low to check for when a - new sub-table should be started. drop is zero when the root table is - being filled, and drop is root when sub-tables are being filled. - - When a new sub-table is needed, it is necessary to look ahead in the - code lengths to determine what size sub-table is needed. The length - counts are used for this, and so count[] is decremented as codes are - entered in the tables. - - used keeps track of how many table entries have been allocated from the - provided *table space. It is checked for LENS and DIST tables against - the constants ENOUGH_LENS and ENOUGH_DISTS to guard against changes in - the initial root table size constants. See the comments in inftrees.h - for more information. - - sym increments through all symbols, and the loop terminates when - all codes of length max, i.e. all codes, have been processed. This - routine permits incomplete codes, so another loop after this one fills - in the rest of the decoding tables with invalid code markers. - */ - - /* set up for code type */ - switch (type) { - case CODES: - base = extra = work; /* dummy value--not used */ - match = 20; - break; - case LENS: - base = lbase; - extra = lext; - match = 257; - break; - default: /* DISTS */ - base = dbase; - extra = dext; - match = 0; - } - - /* initialize state for loop */ - huff = 0; /* starting code */ - sym = 0; /* starting code symbol */ - len = min; /* starting code length */ - next = *table; /* current table to fill in */ - curr = root; /* current table index bits */ - drop = 0; /* current bits to drop from code for index */ - low = (unsigned)(-1); /* trigger new sub-table when len > root */ - used = 1U << root; /* use root table entries */ - mask = used - 1; /* mask for comparing low */ - - /* check available table space */ - if ((type == LENS && used > ENOUGH_LENS) || - (type == DISTS && used > ENOUGH_DISTS)) - return 1; - - /* process all codes and make table entries */ - for (;;) { - /* create table entry */ - here.bits = (unsigned char)(len - drop); - if (work[sym] + 1U < match) { - here.op = (unsigned char)0; - here.val = work[sym]; - } - else if (work[sym] >= match) { - here.op = (unsigned char)(extra[work[sym] - match]); - here.val = base[work[sym] - match]; - } - else { - here.op = (unsigned char)(32 + 64); /* end of block */ - here.val = 0; - } - - /* replicate for those indices with low len bits equal to huff */ - incr = 1U << (len - drop); - fill = 1U << curr; - min = fill; /* save offset to next table */ - do { - fill -= incr; - next[(huff >> drop) + fill] = here; - } while (fill != 0); - - /* backwards increment the len-bit code huff */ - incr = 1U << (len - 1); - while (huff & incr) - incr >>= 1; - if (incr != 0) { - huff &= incr - 1; - huff += incr; - } - else - huff = 0; - - /* go to next symbol, update count, len */ - sym++; - if (--(count[len]) == 0) { - if (len == max) break; - len = lens[work[sym]]; - } - - /* create new sub-table if needed */ - if (len > root && (huff & mask) != low) { - /* if first time, transition to sub-tables */ - if (drop == 0) - drop = root; - - /* increment past last table */ - next += min; /* here min is 1 << curr */ - - /* determine length of next table */ - curr = len - drop; - left = (int)(1 << curr); - while (curr + drop < max) { - left -= count[curr + drop]; - if (left <= 0) break; - curr++; - left <<= 1; - } - - /* check for enough space */ - used += 1U << curr; - if ((type == LENS && used > ENOUGH_LENS) || - (type == DISTS && used > ENOUGH_DISTS)) - return 1; - - /* point entry in root table to sub-table */ - low = huff & mask; - (*table)[low].op = (unsigned char)curr; - (*table)[low].bits = (unsigned char)root; - (*table)[low].val = (unsigned short)(next - *table); - } - } - - /* fill in remaining table entry if code is incomplete (guaranteed to have - at most one remaining entry, since if the code is incomplete, the - maximum code length that was allowed to get this far is one bit) */ - if (huff != 0) { - here.op = (unsigned char)64; /* invalid code marker */ - here.bits = (unsigned char)(len - drop); - here.val = (unsigned short)0; - next[huff] = here; - } - - /* set return parameters */ - *table += used; - *bits = root; - return 0; -} diff --git a/base/poco/Foundation/src/inftrees.h b/base/poco/Foundation/src/inftrees.h deleted file mode 100644 index 55407138bd6..00000000000 --- a/base/poco/Foundation/src/inftrees.h +++ /dev/null @@ -1,63 +0,0 @@ -/* inftrees.h -- header to use inftrees.c - * Copyright (C) 1995-2005, 2010 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* Structure for decoding tables. Each entry provides either the - information needed to do the operation requested by the code that - indexed that table entry, or it provides a pointer to another - table that indexes more bits of the code. op indicates whether - the entry is a pointer to another table, a literal, a length or - distance, an end-of-block, or an invalid code. For a table - pointer, the low four bits of op is the number of index bits of - that table. For a length or distance, the low four bits of op - is the number of extra bits to get after the code. bits is - the number of bits in this code or part of the code to drop off - of the bit buffer. val is the actual byte to output in the case - of a literal, the base length or distance, or the offset from - the current table to the next table. Each entry is four bytes. */ -typedef struct -{ - unsigned char op; /* operation, extra bits, table bits */ - unsigned char bits; /* bits in this part of the code */ - unsigned short val; /* offset in table or code value */ -} code; - -/* op values as set by inflate_table(): - 00000000 - literal - 0000tttt - table link, tttt != 0 is the number of table index bits - 0001eeee - length or distance, eeee is the number of extra bits - 01100000 - end of block - 01000000 - invalid code - */ - -/* Maximum size of the dynamic table. The maximum number of code structures is - 1444, which is the sum of 852 for literal/length codes and 592 for distance - codes. These values were found by exhaustive searches using the program - examples/enough.c found in the zlib distribution. The arguments to that - program are the number of symbols, the initial root table size, and the - maximum bit length of a code. "enough 286 9 15" for literal/length codes - returns returns 852, and "enough 30 6 15" for distance codes returns 592. - The initial root table size (9 or 6) is found in the fifth argument of the - inflate_table() calls in inflate.c and infback.c. If the root table size is - changed, then these maximum sizes would be need to be recalculated and - updated. */ -#define ENOUGH_LENS 852 -#define ENOUGH_DISTS 592 -#define ENOUGH (ENOUGH_LENS + ENOUGH_DISTS) - -/* Type of code to build for inflate_table() */ -typedef enum -{ - CODES, - LENS, - DISTS -} codetype; - -int ZLIB_INTERNAL inflate_table - OF((codetype type, unsigned short FAR * lens, unsigned codes, code FAR * FAR * table, unsigned FAR * bits, unsigned short FAR * work)); diff --git a/base/poco/Foundation/src/pocomsg.mc b/base/poco/Foundation/src/pocomsg.mc deleted file mode 100644 index d1f6d6e7a8e..00000000000 --- a/base/poco/Foundation/src/pocomsg.mc +++ /dev/null @@ -1,87 +0,0 @@ -;// -;// pocomsg.mc[.h] -;// -;// The Poco message source/header file. -;// -;// NOTE: pocomsg.h is automatically generated from pocomsg.mc. -;// Never edit pocomsg.h directly! -;// -;// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -;// and Contributors. -;// -;// Permission is hereby granted, free of charge, to any person or organization -;// obtaining a copy of the software and accompanying documentation covered by -;// this license (the "Software") to use, reproduce, display, distribute, -;// execute, and transmit the Software, and to prepare derivative works of the -;// Software, and to permit third-parties to whom the Software is furnished to -;// do so, all subject to the following: -;// -;// The copyright notices in the Software and this entire statement, including -;// the above license grant, this restriction and the following disclaimer, -;// must be included in all copies of the Software, in whole or in part, and -;// all derivative works of the Software, unless such copies or derivative -;// works are solely in the form of machine-executable object code generated by -;// a source language processor. -;// -;// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -;// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -;// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -;// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -;// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -;// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -;// DEALINGS IN THE SOFTWARE. -;// - - -;// -;// Categories -;// -MessageId=0x1 -SymbolicName=POCO_CTG_FATAL -Language=English -Fatal -. -MessageId=0x2 -SymbolicName=POCO_CTG_CRITICAL -Language=English -Critical -. -MessageId=0x3 -SymbolicName=POCO_CTG_ERROR -Language=English -Error -. -MessageId=0x4 -SymbolicName=POCO_CTG_WARNING -Language=English -Warning -. -MessageId=0x5 -SymbolicName=POCO_CTG_NOTICE -Language=English -Notice -. -MessageId=0x6 -SymbolicName=POCO_CTG_INFORMATION -Language=English -Information -. -MessageId=0x7 -SymbolicName=POCO_CTG_DEBUG -Language=English -Debug -. -MessageId=0x8 -SymbolicName=POCO_CTG_TRACE -Language=English -Trace -. - -;// -;// Event Identifiers -;// -MessageId=0x1000 -SymbolicName=POCO_MSG_LOG -Language=English -%1 -. diff --git a/base/poco/Foundation/src/strtod.cc b/base/poco/Foundation/src/strtod.cc deleted file mode 100644 index 7c776943be5..00000000000 --- a/base/poco/Foundation/src/strtod.cc +++ /dev/null @@ -1,556 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include -#include - -#include "strtod.h" -#include "bignum.h" -#include "cached-powers.h" -#include "ieee.h" - -namespace double_conversion { - -// 2^53 = 9007199254740992. -// Any integer with at most 15 decimal digits will hence fit into a double -// (which has a 53bit significand) without loss of precision. -static const int kMaxExactDoubleIntegerDecimalDigits = 15; -// 2^64 = 18446744073709551616 > 10^19 -static const int kMaxUint64DecimalDigits = 19; - -// Max double: 1.7976931348623157 x 10^308 -// Min non-zero double: 4.9406564584124654 x 10^-324 -// Any x >= 10^309 is interpreted as +infinity. -// Any x <= 10^-324 is interpreted as 0. -// Note that 2.5e-324 (despite being smaller than the min double) will be read -// as non-zero (equal to the min non-zero double). -static const int kMaxDecimalPower = 309; -static const int kMinDecimalPower = -324; - -// 2^64 = 18446744073709551616 -static const uint64_t kMaxUint64 = UINT64_2PART_C(0xFFFFFFFF, FFFFFFFF); - - -static const double exact_powers_of_ten[] = { - 1.0, // 10^0 - 10.0, - 100.0, - 1000.0, - 10000.0, - 100000.0, - 1000000.0, - 10000000.0, - 100000000.0, - 1000000000.0, - 10000000000.0, // 10^10 - 100000000000.0, - 1000000000000.0, - 10000000000000.0, - 100000000000000.0, - 1000000000000000.0, - 10000000000000000.0, - 100000000000000000.0, - 1000000000000000000.0, - 10000000000000000000.0, - 100000000000000000000.0, // 10^20 - 1000000000000000000000.0, - // 10^22 = 0x21e19e0c9bab2400000 = 0x878678326eac9 * 2^22 - 10000000000000000000000.0 -}; -static const int kExactPowersOfTenSize = ARRAY_SIZE(exact_powers_of_ten); - -// Maximum number of significant digits in the decimal representation. -// In fact the value is 772 (see conversions.cc), but to give us some margin -// we round up to 780. -static const int kMaxSignificantDecimalDigits = 780; - -static Vector TrimLeadingZeros(Vector buffer) { - for (int i = 0; i < buffer.length(); i++) { - if (buffer[i] != '0') { - return buffer.SubVector(i, buffer.length()); - } - } - return Vector(buffer.start(), 0); -} - - -static Vector TrimTrailingZeros(Vector buffer) { - for (int i = buffer.length() - 1; i >= 0; --i) { - if (buffer[i] != '0') { - return buffer.SubVector(0, i + 1); - } - } - return Vector(buffer.start(), 0); -} - - -static void CutToMaxSignificantDigits(Vector buffer, - int exponent, - char* significant_buffer, - int* significant_exponent) { - for (int i = 0; i < kMaxSignificantDecimalDigits - 1; ++i) { - significant_buffer[i] = buffer[i]; - } - // The input buffer has been trimmed. Therefore the last digit must be - // different from '0'. - ASSERT(buffer[buffer.length() - 1] != '0'); - // Set the last digit to be non-zero. This is sufficient to guarantee - // correct rounding. - significant_buffer[kMaxSignificantDecimalDigits - 1] = '1'; - *significant_exponent = - exponent + (buffer.length() - kMaxSignificantDecimalDigits); -} - - -// Trims the buffer and cuts it to at most kMaxSignificantDecimalDigits. -// If possible the input-buffer is reused, but if the buffer needs to be -// modified (due to cutting), then the input needs to be copied into the -// buffer_copy_space. -static void TrimAndCut(Vector buffer, int exponent, - char* buffer_copy_space, int space_size, - Vector* trimmed, int* updated_exponent) { - Vector left_trimmed = TrimLeadingZeros(buffer); - Vector right_trimmed = TrimTrailingZeros(left_trimmed); - exponent += left_trimmed.length() - right_trimmed.length(); - if (right_trimmed.length() > kMaxSignificantDecimalDigits) { - (void) space_size; // Mark variable as used. - ASSERT(space_size >= kMaxSignificantDecimalDigits); - CutToMaxSignificantDigits(right_trimmed, exponent, - buffer_copy_space, updated_exponent); - *trimmed = Vector(buffer_copy_space, - kMaxSignificantDecimalDigits); - } else { - *trimmed = right_trimmed; - *updated_exponent = exponent; - } -} - - -// Reads digits from the buffer and converts them to a uint64. -// Reads in as many digits as fit into a uint64. -// When the string starts with "1844674407370955161" no further digit is read. -// Since 2^64 = 18446744073709551616 it would still be possible read another -// digit if it was less or equal than 6, but this would complicate the code. -static uint64_t ReadUint64(Vector buffer, - int* number_of_read_digits) { - uint64_t result = 0; - int i = 0; - while (i < buffer.length() && result <= (kMaxUint64 / 10 - 1)) { - int digit = buffer[i++] - '0'; - ASSERT(0 <= digit && digit <= 9); - result = 10 * result + digit; - } - *number_of_read_digits = i; - return result; -} - - -// Reads a DiyFp from the buffer. -// The returned DiyFp is not necessarily normalized. -// If remaining_decimals is zero then the returned DiyFp is accurate. -// Otherwise it has been rounded and has error of at most 1/2 ulp. -static void ReadDiyFp(Vector buffer, - DiyFp* result, - int* remaining_decimals) { - int read_digits; - uint64_t significand = ReadUint64(buffer, &read_digits); - if (buffer.length() == read_digits) { - *result = DiyFp(significand, 0); - *remaining_decimals = 0; - } else { - // Round the significand. - if (buffer[read_digits] >= '5') { - significand++; - } - // Compute the binary exponent. - int exponent = 0; - *result = DiyFp(significand, exponent); - *remaining_decimals = buffer.length() - read_digits; - } -} - - -static bool DoubleStrtod(Vector trimmed, - int exponent, - double* result) { -#if !defined(DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS) - // On x86 the floating-point stack can be 64 or 80 bits wide. If it is - // 80 bits wide (as is the case on Linux) then double-rounding occurs and the - // result is not accurate. - // We know that Windows32 uses 64 bits and is therefore accurate. - // Note that the ARM simulator is compiled for 32bits. It therefore exhibits - // the same problem. - return false; -#endif - if (trimmed.length() <= kMaxExactDoubleIntegerDecimalDigits) { - int read_digits; - // The trimmed input fits into a double. - // If the 10^exponent (resp. 10^-exponent) fits into a double too then we - // can compute the result-double simply by multiplying (resp. dividing) the - // two numbers. - // This is possible because IEEE guarantees that floating-point operations - // return the best possible approximation. - if (exponent < 0 && -exponent < kExactPowersOfTenSize) { - // 10^-exponent fits into a double. - *result = static_cast(ReadUint64(trimmed, &read_digits)); - ASSERT(read_digits == trimmed.length()); - *result /= exact_powers_of_ten[-exponent]; - return true; - } - if (0 <= exponent && exponent < kExactPowersOfTenSize) { - // 10^exponent fits into a double. - *result = static_cast(ReadUint64(trimmed, &read_digits)); - ASSERT(read_digits == trimmed.length()); - *result *= exact_powers_of_ten[exponent]; - return true; - } - int remaining_digits = - kMaxExactDoubleIntegerDecimalDigits - trimmed.length(); - if ((0 <= exponent) && - (exponent - remaining_digits < kExactPowersOfTenSize)) { - // The trimmed string was short and we can multiply it with - // 10^remaining_digits. As a result the remaining exponent now fits - // into a double too. - *result = static_cast(ReadUint64(trimmed, &read_digits)); - ASSERT(read_digits == trimmed.length()); - *result *= exact_powers_of_ten[remaining_digits]; - *result *= exact_powers_of_ten[exponent - remaining_digits]; - return true; - } - } - return false; -} - - -// Returns 10^exponent as an exact DiyFp. -// The given exponent must be in the range [1; kDecimalExponentDistance[. -static DiyFp AdjustmentPowerOfTen(int exponent) { - ASSERT(0 < exponent); - ASSERT(exponent < PowersOfTenCache::kDecimalExponentDistance); - // Simply hardcode the remaining powers for the given decimal exponent - // distance. - ASSERT(PowersOfTenCache::kDecimalExponentDistance == 8); - switch (exponent) { - case 1: return DiyFp(UINT64_2PART_C(0xa0000000, 00000000), -60); - case 2: return DiyFp(UINT64_2PART_C(0xc8000000, 00000000), -57); - case 3: return DiyFp(UINT64_2PART_C(0xfa000000, 00000000), -54); - case 4: return DiyFp(UINT64_2PART_C(0x9c400000, 00000000), -50); - case 5: return DiyFp(UINT64_2PART_C(0xc3500000, 00000000), -47); - case 6: return DiyFp(UINT64_2PART_C(0xf4240000, 00000000), -44); - case 7: return DiyFp(UINT64_2PART_C(0x98968000, 00000000), -40); - default: - UNREACHABLE(); - return DiyFp(0, 0); - } -} - - -// If the function returns true then the result is the correct double. -// Otherwise it is either the correct double or the double that is just below -// the correct double. -static bool DiyFpStrtod(Vector buffer, - int exponent, - double* result) { - DiyFp input; - int remaining_decimals; - ReadDiyFp(buffer, &input, &remaining_decimals); - // Since we may have dropped some digits the input is not accurate. - // If remaining_decimals is different than 0 than the error is at most - // .5 ulp (unit in the last place). - // We don't want to deal with fractions and therefore keep a common - // denominator. - const int kDenominatorLog = 3; - const int kDenominator = 1 << kDenominatorLog; - // Move the remaining decimals into the exponent. - exponent += remaining_decimals; - int error = (remaining_decimals == 0 ? 0 : kDenominator / 2); - - int old_e = input.e(); - input.Normalize(); - error <<= old_e - input.e(); - - ASSERT(exponent <= PowersOfTenCache::kMaxDecimalExponent); - if (exponent < PowersOfTenCache::kMinDecimalExponent) { - *result = 0.0; - return true; - } - DiyFp cached_power; - int cached_decimal_exponent; - PowersOfTenCache::GetCachedPowerForDecimalExponent(exponent, - &cached_power, - &cached_decimal_exponent); - - if (cached_decimal_exponent != exponent) { - int adjustment_exponent = exponent - cached_decimal_exponent; - DiyFp adjustment_power = AdjustmentPowerOfTen(adjustment_exponent); - input.Multiply(adjustment_power); - if (kMaxUint64DecimalDigits - buffer.length() >= adjustment_exponent) { - // The product of input with the adjustment power fits into a 64 bit - // integer. - ASSERT(DiyFp::kSignificandSize == 64); - } else { - // The adjustment power is exact. There is hence only an error of 0.5. - error += kDenominator / 2; - } - } - - input.Multiply(cached_power); - // The error introduced by a multiplication of a*b equals - // error_a + error_b + error_a*error_b/2^64 + 0.5 - // Substituting a with 'input' and b with 'cached_power' we have - // error_b = 0.5 (all cached powers have an error of less than 0.5 ulp), - // error_ab = 0 or 1 / kDenominator > error_a*error_b/ 2^64 - int error_b = kDenominator / 2; - int error_ab = (error == 0 ? 0 : 1); // We round up to 1. - int fixed_error = kDenominator / 2; - error += error_b + error_ab + fixed_error; - - old_e = input.e(); - input.Normalize(); - error <<= old_e - input.e(); - - // See if the double's significand changes if we add/subtract the error. - int order_of_magnitude = DiyFp::kSignificandSize + input.e(); - int effective_significand_size = - Double::SignificandSizeForOrderOfMagnitude(order_of_magnitude); - int precision_digits_count = - DiyFp::kSignificandSize - effective_significand_size; - if (precision_digits_count + kDenominatorLog >= DiyFp::kSignificandSize) { - // This can only happen for very small denormals. In this case the - // half-way multiplied by the denominator exceeds the range of an uint64. - // Simply shift everything to the right. - int shift_amount = (precision_digits_count + kDenominatorLog) - - DiyFp::kSignificandSize + 1; - input.set_f(input.f() >> shift_amount); - input.set_e(input.e() + shift_amount); - // We add 1 for the lost precision of error, and kDenominator for - // the lost precision of input.f(). - error = (error >> shift_amount) + 1 + kDenominator; - precision_digits_count -= shift_amount; - } - // We use uint64_ts now. This only works if the DiyFp uses uint64_ts too. - ASSERT(DiyFp::kSignificandSize == 64); - ASSERT(precision_digits_count < 64); - uint64_t one64 = 1; - uint64_t precision_bits_mask = (one64 << precision_digits_count) - 1; - uint64_t precision_bits = input.f() & precision_bits_mask; - uint64_t half_way = one64 << (precision_digits_count - 1); - precision_bits *= kDenominator; - half_way *= kDenominator; - DiyFp rounded_input(input.f() >> precision_digits_count, - input.e() + precision_digits_count); - if (precision_bits >= half_way + error) { - rounded_input.set_f(rounded_input.f() + 1); - } - // If the last_bits are too close to the half-way case than we are too - // inaccurate and round down. In this case we return false so that we can - // fall back to a more precise algorithm. - - *result = Double(rounded_input).value(); - if (half_way - error < precision_bits && precision_bits < half_way + error) { - // Too imprecise. The caller will have to fall back to a slower version. - // However the returned number is guaranteed to be either the correct - // double, or the next-lower double. - return false; - } else { - return true; - } -} - - -// Returns -// - -1 if buffer*10^exponent < diy_fp. -// - 0 if buffer*10^exponent == diy_fp. -// - +1 if buffer*10^exponent > diy_fp. -// Preconditions: -// buffer.length() + exponent <= kMaxDecimalPower + 1 -// buffer.length() + exponent > kMinDecimalPower -// buffer.length() <= kMaxDecimalSignificantDigits -static int CompareBufferWithDiyFp(Vector buffer, - int exponent, - DiyFp diy_fp) { - ASSERT(buffer.length() + exponent <= kMaxDecimalPower + 1); - ASSERT(buffer.length() + exponent > kMinDecimalPower); - ASSERT(buffer.length() <= kMaxSignificantDecimalDigits); - // Make sure that the Bignum will be able to hold all our numbers. - // Our Bignum implementation has a separate field for exponents. Shifts will - // consume at most one bigit (< 64 bits). - // ln(10) == 3.3219... - ASSERT(((kMaxDecimalPower + 1) * 333 / 100) < Bignum::kMaxSignificantBits); - Bignum buffer_bignum; - Bignum diy_fp_bignum; - buffer_bignum.AssignDecimalString(buffer); - diy_fp_bignum.AssignUInt64(diy_fp.f()); - if (exponent >= 0) { - buffer_bignum.MultiplyByPowerOfTen(exponent); - } else { - diy_fp_bignum.MultiplyByPowerOfTen(-exponent); - } - if (diy_fp.e() > 0) { - diy_fp_bignum.ShiftLeft(diy_fp.e()); - } else { - buffer_bignum.ShiftLeft(-diy_fp.e()); - } - return Bignum::Compare(buffer_bignum, diy_fp_bignum); -} - - -// Returns true if the guess is the correct double. -// Returns false, when guess is either correct or the next-lower double. -static bool ComputeGuess(Vector trimmed, int exponent, - double* guess) { - if (trimmed.length() == 0) { - *guess = 0.0; - return true; - } - if (exponent + trimmed.length() - 1 >= kMaxDecimalPower) { - *guess = Double::Infinity(); - return true; - } - if (exponent + trimmed.length() <= kMinDecimalPower) { - *guess = 0.0; - return true; - } - - if (DoubleStrtod(trimmed, exponent, guess) || - DiyFpStrtod(trimmed, exponent, guess)) { - return true; - } - if (*guess == Double::Infinity()) { - return true; - } - return false; -} - -double Strtod(Vector buffer, int exponent) { - char copy_buffer[kMaxSignificantDecimalDigits]; - Vector trimmed; - int updated_exponent; - TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, - &trimmed, &updated_exponent); - exponent = updated_exponent; - - double guess; - bool is_correct = ComputeGuess(trimmed, exponent, &guess); - if (is_correct) return guess; - - DiyFp upper_boundary = Double(guess).UpperBoundary(); - int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); - if (comparison < 0) { - return guess; - } else if (comparison > 0) { - return Double(guess).NextDouble(); - } else if ((Double(guess).Significand() & 1) == 0) { - // Round towards even. - return guess; - } else { - return Double(guess).NextDouble(); - } -} - -float Strtof(Vector buffer, int exponent) { - char copy_buffer[kMaxSignificantDecimalDigits]; - Vector trimmed; - int updated_exponent; - TrimAndCut(buffer, exponent, copy_buffer, kMaxSignificantDecimalDigits, - &trimmed, &updated_exponent); - exponent = updated_exponent; - - double double_guess; - bool is_correct = ComputeGuess(trimmed, exponent, &double_guess); - - float float_guess = static_cast(double_guess); - if (float_guess == double_guess) { - // This shortcut triggers for integer values. - return float_guess; - } - - // We must catch double-rounding. Say the double has been rounded up, and is - // now a boundary of a float, and rounds up again. This is why we have to - // look at previous too. - // Example (in decimal numbers): - // input: 12349 - // high-precision (4 digits): 1235 - // low-precision (3 digits): - // when read from input: 123 - // when rounded from high precision: 124. - // To do this we simply look at the neighbors of the correct result and see - // if they would round to the same float. If the guess is not correct we have - // to look at four values (since two different doubles could be the correct - // double). - - double double_next = Double(double_guess).NextDouble(); - double double_previous = Double(double_guess).PreviousDouble(); - - float f1 = static_cast(double_previous); - float f2 = float_guess; - float f3 = static_cast(double_next); - float f4; - if (is_correct) { - f4 = f3; - } else { - double double_next2 = Double(double_next).NextDouble(); - f4 = static_cast(double_next2); - } - (void) f2; // Mark variable as used. - ASSERT(f1 <= f2 && f2 <= f3 && f3 <= f4); - - // If the guess doesn't lie near a single-precision boundary we can simply - // return its float-value. - if (f1 == f4) { - return float_guess; - } - - ASSERT((f1 != f2 && f2 == f3 && f3 == f4) || - (f1 == f2 && f2 != f3 && f3 == f4) || - (f1 == f2 && f2 == f3 && f3 != f4)); - - // guess and next are the two possible candidates (in the same way that - // double_guess was the lower candidate for a double-precision guess). - float guess = f1; - float next = f4; - DiyFp upper_boundary; - if (guess == 0.0f) { - float min_float = 1e-45f; - upper_boundary = Double(static_cast(min_float) / 2).AsDiyFp(); - } else { - upper_boundary = Single(guess).UpperBoundary(); - } - int comparison = CompareBufferWithDiyFp(trimmed, exponent, upper_boundary); - if (comparison < 0) { - return guess; - } else if (comparison > 0) { - return next; - } else if ((Single(guess).Significand() & 1) == 0) { - // Round towards even. - return guess; - } else { - return next; - } -} - -} // namespace double_conversion diff --git a/base/poco/Foundation/src/strtod.h b/base/poco/Foundation/src/strtod.h deleted file mode 100644 index 66f90253e32..00000000000 --- a/base/poco/Foundation/src/strtod.h +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_STRTOD_H_ -#define DOUBLE_CONVERSION_STRTOD_H_ - -#include "utils.h" - -namespace double_conversion -{ - -// The buffer must only contain digits in the range [0-9]. It must not -// contain a dot or a sign. It must not start with '0', and must not be empty. -double Strtod(Vector buffer, int exponent); - -// The buffer must only contain digits in the range [0-9]. It must not -// contain a dot or a sign. It must not start with '0', and must not be empty. -float Strtof(Vector buffer, int exponent); - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_STRTOD_H_ diff --git a/base/poco/Foundation/src/trees.c b/base/poco/Foundation/src/trees.c deleted file mode 100644 index b9d998f1b32..00000000000 --- a/base/poco/Foundation/src/trees.c +++ /dev/null @@ -1,1203 +0,0 @@ -/* trees.c -- output deflated data using Huffman coding - * Copyright (C) 1995-2017 Jean-loup Gailly - * detect_data_type() function provided freely by Cosmin Truta, 2006 - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* - * ALGORITHM - * - * The "deflation" process uses several Huffman trees. The more - * common source values are represented by shorter bit sequences. - * - * Each code tree is stored in a compressed form which is itself - * a Huffman encoding of the lengths of all the code strings (in - * ascending order by source values). The actual code strings are - * reconstructed from the lengths in the inflate process, as described - * in the deflate specification. - * - * REFERENCES - * - * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". - * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc - * - * Storer, James A. - * Data Compression: Methods and Theory, pp. 49-50. - * Computer Science Press, 1988. ISBN 0-7167-8156-5. - * - * Sedgewick, R. - * Algorithms, p290. - * Addison-Wesley, 1983. ISBN 0-201-06672-6. - */ - -/* @(#) $Id$ */ - -/* #define GEN_TREES_H */ - -#include "deflate.h" - -#ifdef ZLIB_DEBUG -# include -#endif - -/* =========================================================================== - * Constants - */ - -#define MAX_BL_BITS 7 -/* Bit length codes must not exceed MAX_BL_BITS bits */ - -#define END_BLOCK 256 -/* end of block literal code */ - -#define REP_3_6 16 -/* repeat previous bit length 3-6 times (2 bits of repeat count) */ - -#define REPZ_3_10 17 -/* repeat a zero length 3-10 times (3 bits of repeat count) */ - -#define REPZ_11_138 18 -/* repeat a zero length 11-138 times (7 bits of repeat count) */ - -local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ - = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; - -local const int extra_dbits[D_CODES] /* extra bits for each distance code */ - = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; - -local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ - = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; - -local const uch bl_order[BL_CODES] - = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; -/* The lengths of the bit length codes are sent in order of decreasing - * probability, to avoid transmitting the lengths for unused bit length codes. - */ - -/* =========================================================================== - * Local data. These are initialized only once. - */ - -#define DIST_CODE_LEN 512 /* see definition of array dist_code below */ - -#if defined(GEN_TREES_H) || !defined(STDC) -/* non ANSI compilers may not accept trees.h */ - -local ct_data static_ltree[L_CODES+2]; -/* The static literal tree. Since the bit lengths are imposed, there is no - * need for the L_CODES extra codes used during heap construction. However - * The codes 286 and 287 are needed to build a canonical tree (see _tr_init - * below). - */ - -local ct_data static_dtree[D_CODES]; -/* The static distance tree. (Actually a trivial tree since all codes use - * 5 bits.) - */ - -uch _dist_code[DIST_CODE_LEN]; -/* Distance codes. The first 256 values correspond to the distances - * 3 .. 258, the last 256 values correspond to the top 8 bits of - * the 15 bit distances. - */ - -uch _length_code[MAX_MATCH-MIN_MATCH+1]; -/* length code for each normalized match length (0 == MIN_MATCH) */ - -local int base_length[LENGTH_CODES]; -/* First normalized length for each code (0 = MIN_MATCH) */ - -local int base_dist[D_CODES]; -/* First normalized distance for each code (0 = distance of 1) */ - -#else -# include "trees.h" -#endif /* GEN_TREES_H */ - -struct static_tree_desc_s { - const ct_data *static_tree; /* static tree or NULL */ - const intf *extra_bits; /* extra bits for each code or NULL */ - int extra_base; /* base index for extra_bits */ - int elems; /* max number of elements in the tree */ - int max_length; /* max bit length for the codes */ -}; - -local const static_tree_desc static_l_desc = -{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; - -local const static_tree_desc static_d_desc = -{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; - -local const static_tree_desc static_bl_desc = -{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; - -/* =========================================================================== - * Local (static) routines in this file. - */ - -local void tr_static_init OF((void)); -local void init_block OF((deflate_state *s)); -local void pqdownheap OF((deflate_state *s, ct_data *tree, int k)); -local void gen_bitlen OF((deflate_state *s, tree_desc *desc)); -local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count)); -local void build_tree OF((deflate_state *s, tree_desc *desc)); -local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); -local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); -local int build_bl_tree OF((deflate_state *s)); -local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, - int blcodes)); -local void compress_block OF((deflate_state *s, const ct_data *ltree, - const ct_data *dtree)); -local int detect_data_type OF((deflate_state *s)); -local unsigned bi_reverse OF((unsigned value, int length)); -local void bi_windup OF((deflate_state *s)); -local void bi_flush OF((deflate_state *s)); - -#ifdef GEN_TREES_H -local void gen_trees_header OF((void)); -#endif - -#ifndef ZLIB_DEBUG -# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) - /* Send a code of the given tree. c and tree must not have side effects */ - -#else /* !ZLIB_DEBUG */ -# define send_code(s, c, tree) \ - { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ - send_bits(s, tree[c].Code, tree[c].Len); } -#endif - -/* =========================================================================== - * Output a short LSB first on the stream. - * IN assertion: there is enough room in pendingBuf. - */ -#define put_short(s, w) { \ - put_byte(s, (uch)((w) & 0xff)); \ - put_byte(s, (uch)((ush)(w) >> 8)); \ -} - -/* =========================================================================== - * Send a value on a given number of bits. - * IN assertion: length <= 16 and value fits in length bits. - */ -#ifdef ZLIB_DEBUG -local void send_bits OF((deflate_state *s, int value, int length)); - -local void send_bits(s, value, length) - deflate_state *s; - int value; /* value to send */ - int length; /* number of bits */ -{ - Tracevv((stderr," l %2d v %4x ", length, value)); - Assert(length > 0 && length <= 15, "invalid length"); - s->bits_sent += (ulg)length; - - /* If not enough room in bi_buf, use (valid) bits from bi_buf and - * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) - * unused bits in value. - */ - if (s->bi_valid > (int)Buf_size - length) { - s->bi_buf |= (ush)value << s->bi_valid; - put_short(s, s->bi_buf); - s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); - s->bi_valid += length - Buf_size; - } else { - s->bi_buf |= (ush)value << s->bi_valid; - s->bi_valid += length; - } -} -#else /* !ZLIB_DEBUG */ - -#define send_bits(s, value, length) \ -{ int len = length;\ - if (s->bi_valid > (int)Buf_size - len) {\ - int val = (int)value;\ - s->bi_buf |= (ush)val << s->bi_valid;\ - put_short(s, s->bi_buf);\ - s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ - s->bi_valid += len - Buf_size;\ - } else {\ - s->bi_buf |= (ush)(value) << s->bi_valid;\ - s->bi_valid += len;\ - }\ -} -#endif /* ZLIB_DEBUG */ - - -/* the arguments must not have side effects */ - -/* =========================================================================== - * Initialize the various 'constant' tables. - */ -local void tr_static_init() -{ -#if defined(GEN_TREES_H) || !defined(STDC) - static int static_init_done = 0; - int n; /* iterates over tree elements */ - int bits; /* bit counter */ - int length; /* length value */ - int code; /* code value */ - int dist; /* distance index */ - ush bl_count[MAX_BITS+1]; - /* number of codes at each bit length for an optimal tree */ - - if (static_init_done) return; - - /* For some embedded targets, global variables are not initialized: */ -#ifdef NO_INIT_GLOBAL_POINTERS - static_l_desc.static_tree = static_ltree; - static_l_desc.extra_bits = extra_lbits; - static_d_desc.static_tree = static_dtree; - static_d_desc.extra_bits = extra_dbits; - static_bl_desc.extra_bits = extra_blbits; -#endif - - /* Initialize the mapping length (0..255) -> length code (0..28) */ - length = 0; - for (code = 0; code < LENGTH_CODES-1; code++) { - base_length[code] = length; - for (n = 0; n < (1< dist code (0..29) */ - dist = 0; - for (code = 0 ; code < 16; code++) { - base_dist[code] = dist; - for (n = 0; n < (1<>= 7; /* from now on, all distances are divided by 128 */ - for ( ; code < D_CODES; code++) { - base_dist[code] = dist << 7; - for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) { - _dist_code[256 + dist++] = (uch)code; - } - } - Assert (dist == 256, "tr_static_init: 256+dist != 512"); - - /* Construct the codes of the static literal tree */ - for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; - n = 0; - while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; - while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; - while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; - while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; - /* Codes 286 and 287 do not exist, but we must include them in the - * tree construction to get a canonical Huffman tree (longest code - * all ones) - */ - gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); - - /* The static distance tree is trivial: */ - for (n = 0; n < D_CODES; n++) { - static_dtree[n].Len = 5; - static_dtree[n].Code = bi_reverse((unsigned)n, 5); - } - static_init_done = 1; - -# ifdef GEN_TREES_H - gen_trees_header(); -# endif -#endif /* defined(GEN_TREES_H) || !defined(STDC) */ -} - -/* =========================================================================== - * Generate the file trees.h describing the static trees. - */ -#ifdef GEN_TREES_H -# ifndef ZLIB_DEBUG -# include -# endif - -# define SEPARATOR(i, last, width) \ - ((i) == (last)? "\n};\n\n" : \ - ((i) % (width) == (width)-1 ? ",\n" : ", ")) - -void gen_trees_header() -{ - FILE *header = fopen("trees.h", "w"); - int i; - - Assert (header != NULL, "Can't open trees.h"); - fprintf(header, - "/* header created automatically with -DGEN_TREES_H */\n\n"); - - fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); - for (i = 0; i < L_CODES+2; i++) { - fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, - static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); - } - - fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); - for (i = 0; i < D_CODES; i++) { - fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, - static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); - } - - fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); - for (i = 0; i < DIST_CODE_LEN; i++) { - fprintf(header, "%2u%s", _dist_code[i], - SEPARATOR(i, DIST_CODE_LEN-1, 20)); - } - - fprintf(header, - "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); - for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { - fprintf(header, "%2u%s", _length_code[i], - SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); - } - - fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); - for (i = 0; i < LENGTH_CODES; i++) { - fprintf(header, "%1u%s", base_length[i], - SEPARATOR(i, LENGTH_CODES-1, 20)); - } - - fprintf(header, "local const int base_dist[D_CODES] = {\n"); - for (i = 0; i < D_CODES; i++) { - fprintf(header, "%5u%s", base_dist[i], - SEPARATOR(i, D_CODES-1, 10)); - } - - fclose(header); -} -#endif /* GEN_TREES_H */ - -/* =========================================================================== - * Initialize the tree data structures for a new zlib stream. - */ -void ZLIB_INTERNAL _tr_init(s) - deflate_state *s; -{ - tr_static_init(); - - s->l_desc.dyn_tree = s->dyn_ltree; - s->l_desc.stat_desc = &static_l_desc; - - s->d_desc.dyn_tree = s->dyn_dtree; - s->d_desc.stat_desc = &static_d_desc; - - s->bl_desc.dyn_tree = s->bl_tree; - s->bl_desc.stat_desc = &static_bl_desc; - - s->bi_buf = 0; - s->bi_valid = 0; -#ifdef ZLIB_DEBUG - s->compressed_len = 0L; - s->bits_sent = 0L; -#endif - - /* Initialize the first block of the first file: */ - init_block(s); -} - -/* =========================================================================== - * Initialize a new block. - */ -local void init_block(s) - deflate_state *s; -{ - int n; /* iterates over tree elements */ - - /* Initialize the trees. */ - for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; - for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; - for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; - - s->dyn_ltree[END_BLOCK].Freq = 1; - s->opt_len = s->static_len = 0L; - s->last_lit = s->matches = 0; -} - -#define SMALLEST 1 -/* Index within the heap array of least frequent node in the Huffman tree */ - - -/* =========================================================================== - * Remove the smallest element from the heap and recreate the heap with - * one less element. Updates heap and heap_len. - */ -#define pqremove(s, tree, top) \ -{\ - top = s->heap[SMALLEST]; \ - s->heap[SMALLEST] = s->heap[s->heap_len--]; \ - pqdownheap(s, tree, SMALLEST); \ -} - -/* =========================================================================== - * Compares to subtrees, using the tree depth as tie breaker when - * the subtrees have equal frequency. This minimizes the worst case length. - */ -#define smaller(tree, n, m, depth) \ - (tree[n].Freq < tree[m].Freq || \ - (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) - -/* =========================================================================== - * Restore the heap property by moving down the tree starting at node k, - * exchanging a node with the smallest of its two sons if necessary, stopping - * when the heap property is re-established (each father smaller than its - * two sons). - */ -local void pqdownheap(s, tree, k) - deflate_state *s; - ct_data *tree; /* the tree to restore */ - int k; /* node to move down */ -{ - int v = s->heap[k]; - int j = k << 1; /* left son of k */ - while (j <= s->heap_len) { - /* Set j to the smallest of the two sons: */ - if (j < s->heap_len && - smaller(tree, s->heap[j+1], s->heap[j], s->depth)) { - j++; - } - /* Exit if v is smaller than both sons */ - if (smaller(tree, v, s->heap[j], s->depth)) break; - - /* Exchange v with the smallest son */ - s->heap[k] = s->heap[j]; k = j; - - /* And continue down the tree, setting j to the left son of k */ - j <<= 1; - } - s->heap[k] = v; -} - -/* =========================================================================== - * Compute the optimal bit lengths for a tree and update the total bit length - * for the current block. - * IN assertion: the fields freq and dad are set, heap[heap_max] and - * above are the tree nodes sorted by increasing frequency. - * OUT assertions: the field len is set to the optimal bit length, the - * array bl_count contains the frequencies for each bit length. - * The length opt_len is updated; static_len is also updated if stree is - * not null. - */ -local void gen_bitlen(s, desc) - deflate_state *s; - tree_desc *desc; /* the tree descriptor */ -{ - ct_data *tree = desc->dyn_tree; - int max_code = desc->max_code; - const ct_data *stree = desc->stat_desc->static_tree; - const intf *extra = desc->stat_desc->extra_bits; - int base = desc->stat_desc->extra_base; - int max_length = desc->stat_desc->max_length; - int h; /* heap index */ - int n, m; /* iterate over the tree elements */ - int bits; /* bit length */ - int xbits; /* extra bits */ - ush f; /* frequency */ - int overflow = 0; /* number of elements with bit length too large */ - - for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; - - /* In a first pass, compute the optimal bit lengths (which may - * overflow in the case of the bit length tree). - */ - tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ - - for (h = s->heap_max+1; h < HEAP_SIZE; h++) { - n = s->heap[h]; - bits = tree[tree[n].Dad].Len + 1; - if (bits > max_length) bits = max_length, overflow++; - tree[n].Len = (ush)bits; - /* We overwrite tree[n].Dad which is no longer needed */ - - if (n > max_code) continue; /* not a leaf node */ - - s->bl_count[bits]++; - xbits = 0; - if (n >= base) xbits = extra[n-base]; - f = tree[n].Freq; - s->opt_len += (ulg)f * (unsigned)(bits + xbits); - if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); - } - if (overflow == 0) return; - - Tracev((stderr,"\nbit length overflow\n")); - /* This happens for example on obj2 and pic of the Calgary corpus */ - - /* Find the first bit length which could increase: */ - do { - bits = max_length-1; - while (s->bl_count[bits] == 0) bits--; - s->bl_count[bits]--; /* move one leaf down the tree */ - s->bl_count[bits+1] += 2; /* move one overflow item as its brother */ - s->bl_count[max_length]--; - /* The brother of the overflow item also moves one step up, - * but this does not affect bl_count[max_length] - */ - overflow -= 2; - } while (overflow > 0); - - /* Now recompute all bit lengths, scanning in increasing frequency. - * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all - * lengths instead of fixing only the wrong ones. This idea is taken - * from 'ar' written by Haruhiko Okumura.) - */ - for (bits = max_length; bits != 0; bits--) { - n = s->bl_count[bits]; - while (n != 0) { - m = s->heap[--h]; - if (m > max_code) continue; - if ((unsigned) tree[m].Len != (unsigned) bits) { - Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); - s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; - tree[m].Len = (ush)bits; - } - n--; - } - } -} - -/* =========================================================================== - * Generate the codes for a given tree and bit counts (which need not be - * optimal). - * IN assertion: the array bl_count contains the bit length statistics for - * the given tree and the field len is set for all tree elements. - * OUT assertion: the field code is set for all tree elements of non - * zero code length. - */ -local void gen_codes (tree, max_code, bl_count) - ct_data *tree; /* the tree to decorate */ - int max_code; /* largest code with non zero frequency */ - ushf *bl_count; /* number of codes at each bit length */ -{ - ush next_code[MAX_BITS+1]; /* next code value for each bit length */ - unsigned code = 0; /* running code value */ - int bits; /* bit index */ - int n; /* code index */ - - /* The distribution counts are first used to generate the code values - * without bit reversal. - */ - for (bits = 1; bits <= MAX_BITS; bits++) { - code = (code + bl_count[bits-1]) << 1; - next_code[bits] = (ush)code; - } - /* Check that the bit counts in bl_count are consistent. The last code - * must be all ones. - */ - Assert (code + bl_count[MAX_BITS]-1 == (1<dyn_tree; - const ct_data *stree = desc->stat_desc->static_tree; - int elems = desc->stat_desc->elems; - int n, m; /* iterate over heap elements */ - int max_code = -1; /* largest code with non zero frequency */ - int node; /* new node being created */ - - /* Construct the initial heap, with least frequent element in - * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. - * heap[0] is not used. - */ - s->heap_len = 0, s->heap_max = HEAP_SIZE; - - for (n = 0; n < elems; n++) { - if (tree[n].Freq != 0) { - s->heap[++(s->heap_len)] = max_code = n; - s->depth[n] = 0; - } else { - tree[n].Len = 0; - } - } - - /* The pkzip format requires that at least one distance code exists, - * and that at least one bit should be sent even if there is only one - * possible code. So to avoid special checks later on we force at least - * two codes of non zero frequency. - */ - while (s->heap_len < 2) { - node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); - tree[node].Freq = 1; - s->depth[node] = 0; - s->opt_len--; if (stree) s->static_len -= stree[node].Len; - /* node is 0 or 1 so it does not have extra bits */ - } - desc->max_code = max_code; - - /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, - * establish sub-heaps of increasing lengths: - */ - for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); - - /* Construct the Huffman tree by repeatedly combining the least two - * frequent nodes. - */ - node = elems; /* next internal node of the tree */ - do { - pqremove(s, tree, n); /* n = node of least frequency */ - m = s->heap[SMALLEST]; /* m = node of next least frequency */ - - s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ - s->heap[--(s->heap_max)] = m; - - /* Create a new node father of n and m */ - tree[node].Freq = tree[n].Freq + tree[m].Freq; - s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? - s->depth[n] : s->depth[m]) + 1); - tree[n].Dad = tree[m].Dad = (ush)node; -#ifdef DUMP_BL_TREE - if (tree == s->bl_tree) { - fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", - node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); - } -#endif - /* and insert the new node in the heap */ - s->heap[SMALLEST] = node++; - pqdownheap(s, tree, SMALLEST); - - } while (s->heap_len >= 2); - - s->heap[--(s->heap_max)] = s->heap[SMALLEST]; - - /* At this point, the fields freq and dad are set. We can now - * generate the bit lengths. - */ - gen_bitlen(s, (tree_desc *)desc); - - /* The field len is now set, we can generate the bit codes */ - gen_codes ((ct_data *)tree, max_code, s->bl_count); -} - -/* =========================================================================== - * Scan a literal or distance tree to determine the frequencies of the codes - * in the bit length tree. - */ -local void scan_tree (s, tree, max_code) - deflate_state *s; - ct_data *tree; /* the tree to be scanned */ - int max_code; /* and its largest code of non zero frequency */ -{ - int n; /* iterates over all tree elements */ - int prevlen = -1; /* last emitted length */ - int curlen; /* length of current code */ - int nextlen = tree[0].Len; /* length of next code */ - int count = 0; /* repeat count of the current code */ - int max_count = 7; /* max repeat count */ - int min_count = 4; /* min repeat count */ - - if (nextlen == 0) max_count = 138, min_count = 3; - tree[max_code+1].Len = (ush)0xffff; /* guard */ - - for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; - if (++count < max_count && curlen == nextlen) { - continue; - } else if (count < min_count) { - s->bl_tree[curlen].Freq += count; - } else if (curlen != 0) { - if (curlen != prevlen) s->bl_tree[curlen].Freq++; - s->bl_tree[REP_3_6].Freq++; - } else if (count <= 10) { - s->bl_tree[REPZ_3_10].Freq++; - } else { - s->bl_tree[REPZ_11_138].Freq++; - } - count = 0; prevlen = curlen; - if (nextlen == 0) { - max_count = 138, min_count = 3; - } else if (curlen == nextlen) { - max_count = 6, min_count = 3; - } else { - max_count = 7, min_count = 4; - } - } -} - -/* =========================================================================== - * Send a literal or distance tree in compressed form, using the codes in - * bl_tree. - */ -local void send_tree (s, tree, max_code) - deflate_state *s; - ct_data *tree; /* the tree to be scanned */ - int max_code; /* and its largest code of non zero frequency */ -{ - int n; /* iterates over all tree elements */ - int prevlen = -1; /* last emitted length */ - int curlen; /* length of current code */ - int nextlen = tree[0].Len; /* length of next code */ - int count = 0; /* repeat count of the current code */ - int max_count = 7; /* max repeat count */ - int min_count = 4; /* min repeat count */ - - /* tree[max_code+1].Len = -1; */ /* guard already set */ - if (nextlen == 0) max_count = 138, min_count = 3; - - for (n = 0; n <= max_code; n++) { - curlen = nextlen; nextlen = tree[n+1].Len; - if (++count < max_count && curlen == nextlen) { - continue; - } else if (count < min_count) { - do { send_code(s, curlen, s->bl_tree); } while (--count != 0); - - } else if (curlen != 0) { - if (curlen != prevlen) { - send_code(s, curlen, s->bl_tree); count--; - } - Assert(count >= 3 && count <= 6, " 3_6?"); - send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2); - - } else if (count <= 10) { - send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3); - - } else { - send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7); - } - count = 0; prevlen = curlen; - if (nextlen == 0) { - max_count = 138, min_count = 3; - } else if (curlen == nextlen) { - max_count = 6, min_count = 3; - } else { - max_count = 7, min_count = 4; - } - } -} - -/* =========================================================================== - * Construct the Huffman tree for the bit lengths and return the index in - * bl_order of the last bit length code to send. - */ -local int build_bl_tree(s) - deflate_state *s; -{ - int max_blindex; /* index of last bit length code of non zero freq */ - - /* Determine the bit length frequencies for literal and distance trees */ - scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); - scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); - - /* Build the bit length tree: */ - build_tree(s, (tree_desc *)(&(s->bl_desc))); - /* opt_len now includes the length of the tree representations, except - * the lengths of the bit lengths codes and the 5+5+4 bits for the counts. - */ - - /* Determine the number of bit length codes to send. The pkzip format - * requires that at least 4 bit length codes be sent. (appnote.txt says - * 3 but the actual value used is 4.) - */ - for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { - if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; - } - /* Update opt_len to include the bit length tree and counts */ - s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4; - Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", - s->opt_len, s->static_len)); - - return max_blindex; -} - -/* =========================================================================== - * Send the header for a block using dynamic Huffman trees: the counts, the - * lengths of the bit length codes, the literal tree and the distance tree. - * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. - */ -local void send_all_trees(s, lcodes, dcodes, blcodes) - deflate_state *s; - int lcodes, dcodes, blcodes; /* number of codes for each tree */ -{ - int rank; /* index in bl_order */ - - Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); - Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, - "too many codes"); - Tracev((stderr, "\nbl counts: ")); - send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */ - send_bits(s, dcodes-1, 5); - send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */ - for (rank = 0; rank < blcodes; rank++) { - Tracev((stderr, "\nbl code %2d ", bl_order[rank])); - send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); - } - Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); - - send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */ - Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); - - send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ - Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); -} - -/* =========================================================================== - * Send a stored block - */ -void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last) - deflate_state *s; - charf *buf; /* input block */ - ulg stored_len; /* length of input block */ - int last; /* one if this is the last block for a file */ -{ - send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ - bi_windup(s); /* align on byte boundary */ - put_short(s, (ush)stored_len); - put_short(s, (ush)~stored_len); - zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); - s->pending += stored_len; -#ifdef ZLIB_DEBUG - s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; - s->compressed_len += (stored_len + 4) << 3; - s->bits_sent += 2*16; - s->bits_sent += stored_len<<3; -#endif -} - -/* =========================================================================== - * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) - */ -void ZLIB_INTERNAL _tr_flush_bits(s) - deflate_state *s; -{ - bi_flush(s); -} - -/* =========================================================================== - * Send one empty static block to give enough lookahead for inflate. - * This takes 10 bits, of which 7 may remain in the bit buffer. - */ -void ZLIB_INTERNAL _tr_align(s) - deflate_state *s; -{ - send_bits(s, STATIC_TREES<<1, 3); - send_code(s, END_BLOCK, static_ltree); -#ifdef ZLIB_DEBUG - s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ -#endif - bi_flush(s); -} - -/* =========================================================================== - * Determine the best encoding for the current block: dynamic trees, static - * trees or store, and write out the encoded block. - */ -void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last) - deflate_state *s; - charf *buf; /* input block, or NULL if too old */ - ulg stored_len; /* length of input block */ - int last; /* one if this is the last block for a file */ -{ - ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ - int max_blindex = 0; /* index of last bit length code of non zero freq */ - - /* Build the Huffman trees unless a stored block is forced */ - if (s->level > 0) { - - /* Check if the file is binary or text */ - if (s->strm->data_type == Z_UNKNOWN) - s->strm->data_type = detect_data_type(s); - - /* Construct the literal and distance trees */ - build_tree(s, (tree_desc *)(&(s->l_desc))); - Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); - - build_tree(s, (tree_desc *)(&(s->d_desc))); - Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, - s->static_len)); - /* At this point, opt_len and static_len are the total bit lengths of - * the compressed block data, excluding the tree representations. - */ - - /* Build the bit length tree for the above two trees, and get the index - * in bl_order of the last bit length code to send. - */ - max_blindex = build_bl_tree(s); - - /* Determine the best encoding. Compute the block lengths in bytes. */ - opt_lenb = (s->opt_len+3+7)>>3; - static_lenb = (s->static_len+3+7)>>3; - - Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", - opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, - s->last_lit)); - - if (static_lenb <= opt_lenb) opt_lenb = static_lenb; - - } else { - Assert(buf != (char*)0, "lost buf"); - opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ - } - -#ifdef FORCE_STORED - if (buf != (char*)0) { /* force stored block */ -#else - if (stored_len+4 <= opt_lenb && buf != (char*)0) { - /* 4: two words for the lengths */ -#endif - /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. - * Otherwise we can't have processed more than WSIZE input bytes since - * the last block flush, because compression would have been - * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to - * transform a block into a stored block. - */ - _tr_stored_block(s, buf, stored_len, last); - -#ifdef FORCE_STATIC - } else if (static_lenb >= 0) { /* force static trees */ -#else - } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { -#endif - send_bits(s, (STATIC_TREES<<1)+last, 3); - compress_block(s, (const ct_data *)static_ltree, - (const ct_data *)static_dtree); -#ifdef ZLIB_DEBUG - s->compressed_len += 3 + s->static_len; -#endif - } else { - send_bits(s, (DYN_TREES<<1)+last, 3); - send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, - max_blindex+1); - compress_block(s, (const ct_data *)s->dyn_ltree, - (const ct_data *)s->dyn_dtree); -#ifdef ZLIB_DEBUG - s->compressed_len += 3 + s->opt_len; -#endif - } - Assert (s->compressed_len == s->bits_sent, "bad compressed size"); - /* The above check is made mod 2^32, for files larger than 512 MB - * and uLong implemented on 32 bits. - */ - init_block(s); - - if (last) { - bi_windup(s); -#ifdef ZLIB_DEBUG - s->compressed_len += 7; /* align on byte boundary */ -#endif - } - Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, - s->compressed_len-7*last)); -} - -/* =========================================================================== - * Save the match info and tally the frequency counts. Return true if - * the current block must be flushed. - */ -int ZLIB_INTERNAL _tr_tally (s, dist, lc) - deflate_state *s; - unsigned dist; /* distance of matched string */ - unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */ -{ - s->d_buf[s->last_lit] = (ush)dist; - s->l_buf[s->last_lit++] = (uch)lc; - if (dist == 0) { - /* lc is the unmatched char */ - s->dyn_ltree[lc].Freq++; - } else { - s->matches++; - /* Here, lc is the match length - MIN_MATCH */ - dist--; /* dist = match distance - 1 */ - Assert((ush)dist < (ush)MAX_DIST(s) && - (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && - (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); - - s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; - s->dyn_dtree[d_code(dist)].Freq++; - } - -#ifdef TRUNCATE_BLOCK - /* Try to guess if it is profitable to stop the current block here */ - if ((s->last_lit & 0x1fff) == 0 && s->level > 2) { - /* Compute an upper bound for the compressed length */ - ulg out_length = (ulg)s->last_lit*8L; - ulg in_length = (ulg)((long)s->strstart - s->block_start); - int dcode; - for (dcode = 0; dcode < D_CODES; dcode++) { - out_length += (ulg)s->dyn_dtree[dcode].Freq * - (5L+extra_dbits[dcode]); - } - out_length >>= 3; - Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ", - s->last_lit, in_length, out_length, - 100L - out_length*100L/in_length)); - if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1; - } -#endif - return (s->last_lit == s->lit_bufsize-1); - /* We avoid equality with lit_bufsize because of wraparound at 64K - * on 16 bit machines and because stored blocks are restricted to - * 64K-1 bytes. - */ -} - -/* =========================================================================== - * Send the block data compressed using the given Huffman trees - */ -local void compress_block(s, ltree, dtree) - deflate_state *s; - const ct_data *ltree; /* literal tree */ - const ct_data *dtree; /* distance tree */ -{ - unsigned dist; /* distance of matched string */ - int lc; /* match length or unmatched char (if dist == 0) */ - unsigned lx = 0; /* running index in l_buf */ - unsigned code; /* the code to send */ - int extra; /* number of extra bits to send */ - - if (s->last_lit != 0) do { - dist = s->d_buf[lx]; - lc = s->l_buf[lx++]; - if (dist == 0) { - send_code(s, lc, ltree); /* send a literal byte */ - Tracecv(isgraph(lc), (stderr," '%c' ", lc)); - } else { - /* Here, lc is the match length - MIN_MATCH */ - code = _length_code[lc]; - send_code(s, code+LITERALS+1, ltree); /* send the length code */ - extra = extra_lbits[code]; - if (extra != 0) { - lc -= base_length[code]; - send_bits(s, lc, extra); /* send the extra length bits */ - } - dist--; /* dist is now the match distance - 1 */ - code = d_code(dist); - Assert (code < D_CODES, "bad d_code"); - - send_code(s, code, dtree); /* send the distance code */ - extra = extra_dbits[code]; - if (extra != 0) { - dist -= (unsigned)base_dist[code]; - send_bits(s, dist, extra); /* send the extra distance bits */ - } - } /* literal or match pair ? */ - - /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */ - Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx, - "pendingBuf overflow"); - - } while (lx < s->last_lit); - - send_code(s, END_BLOCK, ltree); -} - -/* =========================================================================== - * Check if the data type is TEXT or BINARY, using the following algorithm: - * - TEXT if the two conditions below are satisfied: - * a) There are no non-portable control characters belonging to the - * "black list" (0..6, 14..25, 28..31). - * b) There is at least one printable character belonging to the - * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). - * - BINARY otherwise. - * - The following partially-portable control characters form a - * "gray list" that is ignored in this detection algorithm: - * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). - * IN assertion: the fields Freq of dyn_ltree are set. - */ -local int detect_data_type(s) - deflate_state *s; -{ - /* black_mask is the bit mask of black-listed bytes - * set bits 0..6, 14..25, and 28..31 - * 0xf3ffc07f = binary 11110011111111111100000001111111 - */ - unsigned long black_mask = 0xf3ffc07fUL; - int n; - - /* Check for non-textual ("black-listed") bytes. */ - for (n = 0; n <= 31; n++, black_mask >>= 1) - if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) - return Z_BINARY; - - /* Check for textual ("white-listed") bytes. */ - if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 - || s->dyn_ltree[13].Freq != 0) - return Z_TEXT; - for (n = 32; n < LITERALS; n++) - if (s->dyn_ltree[n].Freq != 0) - return Z_TEXT; - - /* There are no "black-listed" or "white-listed" bytes: - * this stream either is empty or has tolerated ("gray-listed") bytes only. - */ - return Z_BINARY; -} - -/* =========================================================================== - * Reverse the first len bits of a code, using straightforward code (a faster - * method would use a table) - * IN assertion: 1 <= len <= 15 - */ -local unsigned bi_reverse(code, len) - unsigned code; /* the value to invert */ - int len; /* its bit length */ -{ - register unsigned res = 0; - do { - res |= code & 1; - code >>= 1, res <<= 1; - } while (--len > 0); - return res >> 1; -} - -/* =========================================================================== - * Flush the bit buffer, keeping at most 7 bits in it. - */ -local void bi_flush(s) - deflate_state *s; -{ - if (s->bi_valid == 16) { - put_short(s, s->bi_buf); - s->bi_buf = 0; - s->bi_valid = 0; - } else if (s->bi_valid >= 8) { - put_byte(s, (Byte)s->bi_buf); - s->bi_buf >>= 8; - s->bi_valid -= 8; - } -} - -/* =========================================================================== - * Flush the bit buffer and align the output on a byte boundary - */ -local void bi_windup(s) - deflate_state *s; -{ - if (s->bi_valid > 8) { - put_short(s, s->bi_buf); - } else if (s->bi_valid > 0) { - put_byte(s, (Byte)s->bi_buf); - } - s->bi_buf = 0; - s->bi_valid = 0; -#ifdef ZLIB_DEBUG - s->bits_sent = (s->bits_sent+7) & ~7; -#endif -} diff --git a/base/poco/Foundation/src/trees.h b/base/poco/Foundation/src/trees.h deleted file mode 100644 index 40ea61f35a6..00000000000 --- a/base/poco/Foundation/src/trees.h +++ /dev/null @@ -1,74 +0,0 @@ -/* header created automatically with -DGEN_TREES_H */ - -local const ct_data static_ltree[L_CODES + 2] - = {{{12}, {8}}, {{140}, {8}}, {{76}, {8}}, {{204}, {8}}, {{44}, {8}}, {{172}, {8}}, {{108}, {8}}, {{236}, {8}}, {{28}, {8}}, - {{156}, {8}}, {{92}, {8}}, {{220}, {8}}, {{60}, {8}}, {{188}, {8}}, {{124}, {8}}, {{252}, {8}}, {{2}, {8}}, {{130}, {8}}, - {{66}, {8}}, {{194}, {8}}, {{34}, {8}}, {{162}, {8}}, {{98}, {8}}, {{226}, {8}}, {{18}, {8}}, {{146}, {8}}, {{82}, {8}}, - {{210}, {8}}, {{50}, {8}}, {{178}, {8}}, {{114}, {8}}, {{242}, {8}}, {{10}, {8}}, {{138}, {8}}, {{74}, {8}}, {{202}, {8}}, - {{42}, {8}}, {{170}, {8}}, {{106}, {8}}, {{234}, {8}}, {{26}, {8}}, {{154}, {8}}, {{90}, {8}}, {{218}, {8}}, {{58}, {8}}, - {{186}, {8}}, {{122}, {8}}, {{250}, {8}}, {{6}, {8}}, {{134}, {8}}, {{70}, {8}}, {{198}, {8}}, {{38}, {8}}, {{166}, {8}}, - {{102}, {8}}, {{230}, {8}}, {{22}, {8}}, {{150}, {8}}, {{86}, {8}}, {{214}, {8}}, {{54}, {8}}, {{182}, {8}}, {{118}, {8}}, - {{246}, {8}}, {{14}, {8}}, {{142}, {8}}, {{78}, {8}}, {{206}, {8}}, {{46}, {8}}, {{174}, {8}}, {{110}, {8}}, {{238}, {8}}, - {{30}, {8}}, {{158}, {8}}, {{94}, {8}}, {{222}, {8}}, {{62}, {8}}, {{190}, {8}}, {{126}, {8}}, {{254}, {8}}, {{1}, {8}}, - {{129}, {8}}, {{65}, {8}}, {{193}, {8}}, {{33}, {8}}, {{161}, {8}}, {{97}, {8}}, {{225}, {8}}, {{17}, {8}}, {{145}, {8}}, - {{81}, {8}}, {{209}, {8}}, {{49}, {8}}, {{177}, {8}}, {{113}, {8}}, {{241}, {8}}, {{9}, {8}}, {{137}, {8}}, {{73}, {8}}, - {{201}, {8}}, {{41}, {8}}, {{169}, {8}}, {{105}, {8}}, {{233}, {8}}, {{25}, {8}}, {{153}, {8}}, {{89}, {8}}, {{217}, {8}}, - {{57}, {8}}, {{185}, {8}}, {{121}, {8}}, {{249}, {8}}, {{5}, {8}}, {{133}, {8}}, {{69}, {8}}, {{197}, {8}}, {{37}, {8}}, - {{165}, {8}}, {{101}, {8}}, {{229}, {8}}, {{21}, {8}}, {{149}, {8}}, {{85}, {8}}, {{213}, {8}}, {{53}, {8}}, {{181}, {8}}, - {{117}, {8}}, {{245}, {8}}, {{13}, {8}}, {{141}, {8}}, {{77}, {8}}, {{205}, {8}}, {{45}, {8}}, {{173}, {8}}, {{109}, {8}}, - {{237}, {8}}, {{29}, {8}}, {{157}, {8}}, {{93}, {8}}, {{221}, {8}}, {{61}, {8}}, {{189}, {8}}, {{125}, {8}}, {{253}, {8}}, - {{19}, {9}}, {{275}, {9}}, {{147}, {9}}, {{403}, {9}}, {{83}, {9}}, {{339}, {9}}, {{211}, {9}}, {{467}, {9}}, {{51}, {9}}, - {{307}, {9}}, {{179}, {9}}, {{435}, {9}}, {{115}, {9}}, {{371}, {9}}, {{243}, {9}}, {{499}, {9}}, {{11}, {9}}, {{267}, {9}}, - {{139}, {9}}, {{395}, {9}}, {{75}, {9}}, {{331}, {9}}, {{203}, {9}}, {{459}, {9}}, {{43}, {9}}, {{299}, {9}}, {{171}, {9}}, - {{427}, {9}}, {{107}, {9}}, {{363}, {9}}, {{235}, {9}}, {{491}, {9}}, {{27}, {9}}, {{283}, {9}}, {{155}, {9}}, {{411}, {9}}, - {{91}, {9}}, {{347}, {9}}, {{219}, {9}}, {{475}, {9}}, {{59}, {9}}, {{315}, {9}}, {{187}, {9}}, {{443}, {9}}, {{123}, {9}}, - {{379}, {9}}, {{251}, {9}}, {{507}, {9}}, {{7}, {9}}, {{263}, {9}}, {{135}, {9}}, {{391}, {9}}, {{71}, {9}}, {{327}, {9}}, - {{199}, {9}}, {{455}, {9}}, {{39}, {9}}, {{295}, {9}}, {{167}, {9}}, {{423}, {9}}, {{103}, {9}}, {{359}, {9}}, {{231}, {9}}, - {{487}, {9}}, {{23}, {9}}, {{279}, {9}}, {{151}, {9}}, {{407}, {9}}, {{87}, {9}}, {{343}, {9}}, {{215}, {9}}, {{471}, {9}}, - {{55}, {9}}, {{311}, {9}}, {{183}, {9}}, {{439}, {9}}, {{119}, {9}}, {{375}, {9}}, {{247}, {9}}, {{503}, {9}}, {{15}, {9}}, - {{271}, {9}}, {{143}, {9}}, {{399}, {9}}, {{79}, {9}}, {{335}, {9}}, {{207}, {9}}, {{463}, {9}}, {{47}, {9}}, {{303}, {9}}, - {{175}, {9}}, {{431}, {9}}, {{111}, {9}}, {{367}, {9}}, {{239}, {9}}, {{495}, {9}}, {{31}, {9}}, {{287}, {9}}, {{159}, {9}}, - {{415}, {9}}, {{95}, {9}}, {{351}, {9}}, {{223}, {9}}, {{479}, {9}}, {{63}, {9}}, {{319}, {9}}, {{191}, {9}}, {{447}, {9}}, - {{127}, {9}}, {{383}, {9}}, {{255}, {9}}, {{511}, {9}}, {{0}, {7}}, {{64}, {7}}, {{32}, {7}}, {{96}, {7}}, {{16}, {7}}, - {{80}, {7}}, {{48}, {7}}, {{112}, {7}}, {{8}, {7}}, {{72}, {7}}, {{40}, {7}}, {{104}, {7}}, {{24}, {7}}, {{88}, {7}}, - {{56}, {7}}, {{120}, {7}}, {{4}, {7}}, {{68}, {7}}, {{36}, {7}}, {{100}, {7}}, {{20}, {7}}, {{84}, {7}}, {{52}, {7}}, - {{116}, {7}}, {{3}, {8}}, {{131}, {8}}, {{67}, {8}}, {{195}, {8}}, {{35}, {8}}, {{163}, {8}}, {{99}, {8}}, {{227}, {8}}}; - -local const ct_data static_dtree[D_CODES] - = {{{0}, {5}}, {{16}, {5}}, {{8}, {5}}, {{24}, {5}}, {{4}, {5}}, {{20}, {5}}, {{12}, {5}}, {{28}, {5}}, {{2}, {5}}, {{18}, {5}}, - {{10}, {5}}, {{26}, {5}}, {{6}, {5}}, {{22}, {5}}, {{14}, {5}}, {{30}, {5}}, {{1}, {5}}, {{17}, {5}}, {{9}, {5}}, {{25}, {5}}, - {{5}, {5}}, {{21}, {5}}, {{13}, {5}}, {{29}, {5}}, {{3}, {5}}, {{19}, {5}}, {{11}, {5}}, {{27}, {5}}, {{7}, {5}}, {{23}, {5}}}; - -const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] - = {0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 0, 0, 16, 17, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}; - -const uch ZLIB_INTERNAL _length_code[MAX_MATCH - MIN_MATCH + 1] - = {0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, - 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28}; - -local const int base_length[LENGTH_CODES] - = {0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 0}; - -local const int base_dist[D_CODES] = {0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, - 192, 256, 384, 512, 768, 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576}; diff --git a/base/poco/Foundation/src/utils.h b/base/poco/Foundation/src/utils.h deleted file mode 100644 index bdfdaed9833..00000000000 --- a/base/poco/Foundation/src/utils.h +++ /dev/null @@ -1,311 +0,0 @@ -// Copyright 2010 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef DOUBLE_CONVERSION_UTILS_H_ -#define DOUBLE_CONVERSION_UTILS_H_ - -#include -#include - -#include -#ifndef ASSERT -# define ASSERT(condition) assert(condition); -#endif -#ifndef UNIMPLEMENTED -# define UNIMPLEMENTED() (abort()) -#endif -#ifndef UNREACHABLE -# define UNREACHABLE() (abort()) -#endif - -// Double operations detection based on target architecture. -// Linux uses a 80bit wide floating point stack on x86. This induces double -// rounding, which in turn leads to wrong results. -// An easy way to test if the floating-point operations are correct is to -// evaluate: 89255.0/1e22. If the floating-point stack is 64 bits wide then -// the result is equal to 89255e-22. -// The best way to test this, is to create a division-function and to compare -// the output of the division with the expected result. (Inlining must be -// disabled.) -// On Linux,x86 89255e-22 != Div_double(89255.0/1e22) -#if defined(_M_X64) || defined(__x86_64__) || defined(__ARMEL__) || defined(_M_ARM) || defined(__arm__) || defined(__arm64__) \ - || defined(__avr32__) || defined(__hppa__) || defined(__ia64__) || defined(__mips__) || defined(__powerpc__) || defined(__ppc__) \ - || defined(__ppc64__) || defined(__sparc__) || defined(__sparc) || defined(__s390__) || defined(__SH4__) || defined(__alpha__) \ - || defined(_MIPS_ARCH_MIPS32R2) || defined(__riscv) || defined(__AARCH64EL__) || defined(nios2) || defined(__nios2) \ - || defined(__nios2__) -# define DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS 1 -#elif defined(_M_IX86) || defined(__i386__) || defined(__i386) -# undef DOUBLE_CONVERSION_CORRECT_DOUBLE_OPERATIONS -#else -# error Target architecture was not detected as supported by Double-Conversion. -#endif - -#if defined(__GNUC__) -# define DOUBLE_CONVERSION_UNUSED __attribute__((unused)) -#else -# define DOUBLE_CONVERSION_UNUSED -#endif - - -# include - - -// The following macro works on both 32 and 64-bit platforms. -// Usage: instead of writing 0x1234567890123456 -// write UINT64_2PART_C(0x12345678,90123456); -#define UINT64_2PART_C(a, b) (((static_cast(a) << 32) + 0x##b##u)) - - -// The expression ARRAY_SIZE(a) is a compile-time constant of type -// size_t which represents the number of elements of the given -// array. You should only use ARRAY_SIZE on statically allocated -// arrays. -#ifndef ARRAY_SIZE -# define ARRAY_SIZE(a) ((sizeof(a) / sizeof(*(a))) / static_cast(!(sizeof(a) % sizeof(*(a))))) -#endif - -// A macro to disallow the evil copy constructor and operator= functions -// This should be used in the private: declarations for a class -#ifndef DISALLOW_COPY_AND_ASSIGN -# define DISALLOW_COPY_AND_ASSIGN(TypeName) \ - TypeName(const TypeName &); \ - void operator=(const TypeName &) -#endif - -// A macro to disallow all the implicit constructors, namely the -// default constructor, copy constructor and operator= functions. -// -// This should be used in the private: declarations for a class -// that wants to prevent anyone from instantiating it. This is -// especially useful for classes containing only static methods. -#ifndef DISALLOW_IMPLICIT_CONSTRUCTORS -# define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \ - TypeName(); \ - DISALLOW_COPY_AND_ASSIGN(TypeName) -#endif - -namespace double_conversion -{ - -static const int kCharSize = sizeof(char); - -// Returns the maximum of the two parameters. -template -static T Max(T a, T b) -{ - return a < b ? b : a; -} - - -// Returns the minimum of the two parameters. -template -static T Min(T a, T b) -{ - return a < b ? a : b; -} - - -inline int StrLength(const char * string) -{ - size_t length = strlen(string); - ASSERT(length == static_cast(static_cast(length))); - return static_cast(length); -} - -// This is a simplified version of V8's Vector class. -template -class Vector -{ -public: - Vector() : start_(NULL), length_(0) { } - Vector(T * data, int length) : start_(data), length_(length) { ASSERT(length == 0 || (length > 0 && data != NULL)); } - - // Returns a vector using the same backing storage as this one, - // spanning from and including 'from', to but not including 'to'. - Vector SubVector(int from, int to) - { - ASSERT(to <= length_); - ASSERT(from < to); - ASSERT(0 <= from); - return Vector(start() + from, to - from); - } - - // Returns the length of the vector. - int length() const { return length_; } - - // Returns whether or not the vector is empty. - bool is_empty() const { return length_ == 0; } - - // Returns the pointer to the start of the data in the vector. - T * start() const { return start_; } - - // Access individual vector elements - checks bounds in debug mode. - T & operator[](int index) const - { - ASSERT(0 <= index && index < length_); - return start_[index]; - } - - T & first() { return start_[0]; } - - T & last() { return start_[length_ - 1]; } - -private: - T * start_; - int length_; -}; - - -// Helper class for building result strings in a character buffer. The -// purpose of the class is to use safe operations that checks the -// buffer bounds on all operations in debug mode. -class StringBuilder -{ -public: - StringBuilder(char * buffer, int size) : buffer_(buffer, size), position_(0) { } - - ~StringBuilder() - { - if (!is_finalized()) - Finalize(); - } - - int size() const { return buffer_.length(); } - - // Get the current position in the builder. - int position() const - { - ASSERT(!is_finalized()); - return position_; - } - - // Reset the position. - void Reset() { position_ = 0; } - - // Add a single character to the builder. It is not allowed to add - // 0-characters; use the Finalize() method to terminate the string - // instead. - void AddCharacter(char c) - { - ASSERT(c != '\0'); - ASSERT(!is_finalized() && position_ < buffer_.length()); - buffer_[position_++] = c; - } - - // Add an entire string to the builder. Uses strlen() internally to - // compute the length of the input string. - void AddString(const char * s) { AddSubstring(s, StrLength(s)); } - - // Add the first 'n' characters of the given string 's' to the - // builder. The input string must have enough characters. - void AddSubstring(const char * s, int n) - { - ASSERT(!is_finalized() && position_ + n < buffer_.length()); - ASSERT(static_cast(n) <= strlen(s)); - memmove(&buffer_[position_], s, n * kCharSize); - position_ += n; - } - - - // Add character padding to the builder. If count is non-positive, - // nothing is added to the builder. - void AddPadding(char c, int count) - { - for (int i = 0; i < count; i++) - { - AddCharacter(c); - } - } - - // Finalize the string by 0-terminating it and returning the buffer. - char * Finalize() - { - ASSERT(!is_finalized() && position_ < buffer_.length()); - buffer_[position_] = '\0'; - // Make sure nobody managed to add a 0-character to the - // buffer while building the string. - ASSERT(strlen(buffer_.start()) == static_cast(position_)); - position_ = -1; - ASSERT(is_finalized()); - return buffer_.start(); - } - -private: - Vector buffer_; - int position_; - - bool is_finalized() const { return position_ < 0; } - - DISALLOW_IMPLICIT_CONSTRUCTORS(StringBuilder); -}; - -// The type-based aliasing rule allows the compiler to assume that pointers of -// different types (for some definition of different) never alias each other. -// Thus the following code does not work: -// -// float f = foo(); -// int fbits = *(int*)(&f); -// -// The compiler 'knows' that the int pointer can't refer to f since the types -// don't match, so the compiler may cache f in a register, leaving random data -// in fbits. Using C++ style casts makes no difference, however a pointer to -// char data is assumed to alias any other pointer. This is the 'memcpy -// exception'. -// -// Bit_cast uses the memcpy exception to move the bits from a variable of one -// type of a variable of another type. Of course the end result is likely to -// be implementation dependent. Most compilers (gcc-4.2 and MSVC 2005) -// will completely optimize BitCast away. -// -// There is an additional use for BitCast. -// Recent gccs will warn when they see casts that may result in breakage due to -// the type-based aliasing rule. If you have checked that there is no breakage -// you can use BitCast to cast one pointer type to another. This confuses gcc -// enough that it can no longer see that you have cast one pointer type to -// another thus avoiding the warning. -template -inline Dest BitCast(const Source & source) -{ - // Compile time assertion: sizeof(Dest) == sizeof(Source) - // A compile error here means your Dest and Source have different sizes. - DOUBLE_CONVERSION_UNUSED - typedef char VerifySizesAreEqual[sizeof(Dest) == sizeof(Source) ? 1 : -1]; - - Dest dest; - memmove(&dest, &source, sizeof(dest)); - return dest; -} - -template -inline Dest BitCast(Source * source) -{ - return BitCast(reinterpret_cast(source)); -} - -} // namespace double_conversion - -#endif // DOUBLE_CONVERSION_UTILS_H_ diff --git a/base/poco/Foundation/src/zutil.c b/base/poco/Foundation/src/zutil.c deleted file mode 100644 index 14a7bebb1b3..00000000000 --- a/base/poco/Foundation/src/zutil.c +++ /dev/null @@ -1,324 +0,0 @@ -/* zutil.c -- target dependent utility functions for the compression library - * Copyright (C) 1995-2005, 2010, 2011, 2012 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* @(#) $Id: //poco/1.4/Foundation/src/zutil.c#3 $ */ - -#include "zutil.h" -#ifndef Z_SOLO -# include "gzguts.h" -#endif - -#ifndef NO_DUMMY_DECL -struct internal_state {int dummy;}; /* for buggy compilers */ -#endif - -z_const char * const z_errmsg[10] = { -"need dictionary", /* Z_NEED_DICT 2 */ -"stream end", /* Z_STREAM_END 1 */ -"", /* Z_OK 0 */ -"file error", /* Z_ERRNO (-1) */ -"stream error", /* Z_STREAM_ERROR (-2) */ -"data error", /* Z_DATA_ERROR (-3) */ -"insufficient memory", /* Z_MEM_ERROR (-4) */ -"buffer error", /* Z_BUF_ERROR (-5) */ -"incompatible version",/* Z_VERSION_ERROR (-6) */ -""}; - - -const char * ZEXPORT zlibVersion() -{ - return ZLIB_VERSION; -} - -uLong ZEXPORT zlibCompileFlags() -{ - uLong flags; - - flags = 0; - switch ((int)(sizeof(uInt))) { - case 2: break; - case 4: flags += 1; break; - case 8: flags += 2; break; - default: flags += 3; - } - switch ((int)(sizeof(uLong))) { - case 2: break; - case 4: flags += 1 << 2; break; - case 8: flags += 2 << 2; break; - default: flags += 3 << 2; - } - switch ((int)(sizeof(voidpf))) { - case 2: break; - case 4: flags += 1 << 4; break; - case 8: flags += 2 << 4; break; - default: flags += 3 << 4; - } - switch ((int)(sizeof(z_off_t))) { - case 2: break; - case 4: flags += 1 << 6; break; - case 8: flags += 2 << 6; break; - default: flags += 3 << 6; - } -#ifdef ZLIB_DEBUG - flags += 1 << 8; -#endif -#if defined(ASMV) || defined(ASMINF) - flags += 1 << 9; -#endif -#ifdef ZLIB_WINAPI - flags += 1 << 10; -#endif -#ifdef BUILDFIXED - flags += 1 << 12; -#endif -#ifdef DYNAMIC_CRC_TABLE - flags += 1 << 13; -#endif -#ifdef NO_GZCOMPRESS - flags += 1L << 16; -#endif -#ifdef NO_GZIP - flags += 1L << 17; -#endif -#ifdef PKZIP_BUG_WORKAROUND - flags += 1L << 20; -#endif -#ifdef FASTEST - flags += 1L << 21; -#endif -#if defined(STDC) || defined(Z_HAVE_STDARG_H) -# ifdef NO_vsnprintf - flags += 1L << 25; -# ifdef HAS_vsprintf_void - flags += 1L << 26; -# endif -# else -# ifdef HAS_vsnprintf_void - flags += 1L << 26; -# endif -# endif -#else - flags += 1L << 24; -# ifdef NO_snprintf - flags += 1L << 25; -# ifdef HAS_sprintf_void - flags += 1L << 26; -# endif -# else -# ifdef HAS_snprintf_void - flags += 1L << 26; -# endif -# endif -#endif - return flags; -} - -#ifdef ZLIB_DEBUG - -# ifndef verbose -# define verbose 0 -# endif -int ZLIB_INTERNAL z_verbose = verbose; - -void ZLIB_INTERNAL z_error (m) - char *m; -{ - fprintf(stderr, "%s\n", m); - exit(1); -} -#endif - -/* exported to allow conversion of error code to string for compress() and - * uncompress() - */ -const char * ZEXPORT zError(err) - int err; -{ - return ERR_MSG(err); -} - -#if defined(_WIN32_WCE) && _WIN32_WCE < 0x800 - /* The Microsoft C Run-Time Library for Windows CE doesn't have - * errno. We define it as a global variable to simplify porting. - * Its value is always 0 and should not be used. - */ - int errno = 0; -#endif - -#ifndef HAVE_MEMCPY - -void ZLIB_INTERNAL zmemcpy(dest, source, len) - Bytef* dest; - const Bytef* source; - uInt len; -{ - if (len == 0) return; - do { - *dest++ = *source++; /* ??? to be unrolled */ - } while (--len != 0); -} - -int ZLIB_INTERNAL zmemcmp(s1, s2, len) - const Bytef* s1; - const Bytef* s2; - uInt len; -{ - uInt j; - - for (j = 0; j < len; j++) { - if (s1[j] != s2[j]) return 2*(s1[j] > s2[j])-1; - } - return 0; -} - -void ZLIB_INTERNAL zmemzero(dest, len) - Bytef* dest; - uInt len; -{ - if (len == 0) return; - do { - *dest++ = 0; /* ??? to be unrolled */ - } while (--len != 0); -} -#endif - -#ifndef Z_SOLO - -#ifdef SYS16BIT - -#ifdef __TURBOC__ -/* Turbo C in 16-bit mode */ - -# define MY_ZCALLOC - -/* Turbo C malloc() does not allow dynamic allocation of 64K bytes - * and farmalloc(64K) returns a pointer with an offset of 8, so we - * must fix the pointer. Warning: the pointer must be put back to its - * original form in order to free it, use zcfree(). - */ - -#define MAX_PTR 10 -/* 10*64K = 640K */ - -local int next_ptr = 0; - -typedef struct ptr_table_s { - voidpf org_ptr; - voidpf new_ptr; -} ptr_table; - -local ptr_table table[MAX_PTR]; -/* This table is used to remember the original form of pointers - * to large buffers (64K). Such pointers are normalized with a zero offset. - * Since MS-DOS is not a preemptive multitasking OS, this table is not - * protected from concurrent access. This hack doesn't work anyway on - * a protected system like OS/2. Use Microsoft C instead. - */ - -voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size) -{ - voidpf buf = opaque; /* just to make some compilers happy */ - ulg bsize = (ulg)items*size; - - /* If we allocate less than 65520 bytes, we assume that farmalloc - * will return a usable pointer which doesn't have to be normalized. - */ - if (bsize < 65520L) { - buf = farmalloc(bsize); - if (*(ush*)&buf != 0) return buf; - } else { - buf = farmalloc(bsize + 16L); - } - if (buf == NULL || next_ptr >= MAX_PTR) return NULL; - table[next_ptr].org_ptr = buf; - - /* Normalize the pointer to seg:0 */ - *((ush*)&buf+1) += ((ush)((uch*)buf-0) + 15) >> 4; - *(ush*)&buf = 0; - table[next_ptr++].new_ptr = buf; - return buf; -} - -void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) -{ - int n; - if (*(ush*)&ptr != 0) { /* object < 64K */ - farfree(ptr); - return; - } - /* Find the original pointer */ - for (n = 0; n < next_ptr; n++) { - if (ptr != table[n].new_ptr) continue; - - farfree(table[n].org_ptr); - while (++n < next_ptr) { - table[n-1] = table[n]; - } - next_ptr--; - return; - } - ptr = opaque; /* just to make some compilers happy */ - Assert(0, "zcfree: ptr not found"); -} - -#endif /* __TURBOC__ */ - - -#ifdef M_I86 -/* Microsoft C in 16-bit mode */ - -# define MY_ZCALLOC - -#if (!defined(_MSC_VER) || (_MSC_VER <= 600)) -# define _halloc halloc -# define _hfree hfree -#endif - -voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size) -{ - if (opaque) opaque = 0; /* to make compiler happy */ - return _halloc((long)items, size); -} - -void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr) -{ - if (opaque) opaque = 0; /* to make compiler happy */ - _hfree(ptr); -} - -#endif /* M_I86 */ - -#endif /* SYS16BIT */ - - -#ifndef MY_ZCALLOC /* Any system without a special alloc function */ - -#ifndef STDC -extern voidp malloc OF((uInt size)); -extern voidp calloc OF((uInt items, uInt size)); -extern void free OF((voidpf ptr)); -#endif - -voidpf ZLIB_INTERNAL zcalloc (opaque, items, size) - voidpf opaque; - unsigned items; - unsigned size; -{ - if (opaque) items += size - size; /* make compiler happy */ - return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) : - (voidpf)calloc(items, size); -} - -void ZLIB_INTERNAL zcfree (opaque, ptr) - voidpf opaque; - voidpf ptr; -{ - free(ptr); - if (opaque) return; /* make compiler happy */ -} - -#endif /* MY_ZCALLOC */ - -#endif /* !Z_SOLO */ diff --git a/base/poco/Foundation/src/zutil.h b/base/poco/Foundation/src/zutil.h deleted file mode 100644 index 4deb4ec0db1..00000000000 --- a/base/poco/Foundation/src/zutil.h +++ /dev/null @@ -1,237 +0,0 @@ -/* zutil.h -- internal interface and configuration of the compression library - * Copyright (C) 1995-2013 Jean-loup Gailly. - * For conditions of distribution and use, see copyright notice in zlib.h - */ - -/* WARNING: this file should *not* be used by applications. It is - part of the implementation of the compression library and is - subject to change. Applications should only use zlib.h. - */ - -/* @(#) $Id: //poco/1.4/Foundation/src/zutil.h#3 $ */ - -#ifndef ZUTIL_H -#define ZUTIL_H - -#ifdef HAVE_HIDDEN -# define ZLIB_INTERNAL __attribute__((visibility("hidden"))) -#else -# define ZLIB_INTERNAL -#endif - -#include "zlib.h" - -#if defined(STDC) && !defined(Z_SOLO) -# include -# include -# include -#endif - -#ifdef Z_SOLO -typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ -#endif - -#ifndef local -# define local static -#endif -/* compile with -Dlocal if your debugger can't find static symbols */ - -typedef unsigned char uch; -typedef uch FAR uchf; -typedef unsigned short ush; -typedef ush FAR ushf; -typedef unsigned long ulg; - -extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ -/* (size given to avoid silly warnings with Visual C++) */ - -#define ERR_MSG(err) z_errmsg[Z_NEED_DICT - (err)] - -#define ERR_RETURN(strm, err) return (strm->msg = ERR_MSG(err), (err)) -/* To be used only when the state is known to be valid */ - -/* common constants */ - -#ifndef DEF_WBITS -# define DEF_WBITS MAX_WBITS -#endif -/* default windowBits for decompression. MAX_WBITS is for compression only */ - -#if MAX_MEM_LEVEL >= 8 -# define DEF_MEM_LEVEL 8 -#else -# define DEF_MEM_LEVEL MAX_MEM_LEVEL -#endif -/* default memLevel */ - -#define STORED_BLOCK 0 -#define STATIC_TREES 1 -#define DYN_TREES 2 -/* The three kinds of block type */ - -#define MIN_MATCH 3 -#define MAX_MATCH 258 -/* The minimum and maximum match lengths */ - -#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ - -/* target dependencies */ - -#ifdef AMIGA -# define OS_CODE 0x01 -#endif - -#if defined(VAXC) || defined(VMS) -# define OS_CODE 0x02 -# define F_OPEN(name, mode) fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") -#endif - -#if defined(ATARI) || defined(atarist) -# define OS_CODE 0x05 -#endif - -#ifdef OS2 -# define OS_CODE 0x06 -# if defined(M_I86) && !defined(Z_SOLO) -# include -# endif -#endif - -#if defined(MACOS) || defined(TARGET_OS_MAC) -# define OS_CODE 0x07 -# ifndef Z_SOLO -# if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os -# include /* for fdopen */ -# else -# ifndef fdopen -# define fdopen(fd, mode) NULL /* No fdopen() */ -# endif -# endif -# endif -#endif - -#ifdef TOPS20 -# define OS_CODE 0x0a -#endif - -#ifdef WIN32 -# define OS_CODE 0x0b -#endif - -#ifdef __50SERIES /* Prime/PRIMOS */ -# define OS_CODE 0x0f -#endif - -#if defined(_BEOS_) || defined(RISCOS) -# define fdopen(fd, mode) NULL /* No fdopen() */ -#endif - - -/* provide prototypes for these when building zlib without LFS */ -#if !defined(_WIN32) && (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE - 0 == 0) -ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); -ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); -#endif - -/* common defaults */ - -#ifndef OS_CODE -# define OS_CODE 0x03 /* assume Unix */ -#endif - -#ifndef F_OPEN -# define F_OPEN(name, mode) fopen((name), (mode)) -#endif - -/* functions */ - -#if defined(pyr) || defined(Z_SOLO) -# define NO_MEMCPY -#endif -#if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) -/* Use our own functions for small and medium model with MSC <= 5.0. - * You may have to use the same strategy for Borland C (untested). - * The __SC__ check is for Symantec. - */ -# define NO_MEMCPY -#endif -#if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) -# define HAVE_MEMCPY -#endif -#ifdef HAVE_MEMCPY -# ifdef SMALL_MEDIUM /* MS-DOS small or medium model */ -# define zmemcpy _fmemcpy -# define zmemcmp _fmemcmp -# define zmemzero(dest, len) _fmemset(dest, 0, len) -# else -# define zmemcpy memcpy -# define zmemcmp memcmp -# define zmemzero(dest, len) memset(dest, 0, len) -# endif -#else -void ZLIB_INTERNAL zmemcpy OF((Bytef * dest, const Bytef * source, uInt len)); -int ZLIB_INTERNAL zmemcmp OF((const Bytef * s1, const Bytef * s2, uInt len)); -void ZLIB_INTERNAL zmemzero OF((Bytef * dest, uInt len)); -#endif - -/* Diagnostic functions */ -#ifdef ZLIB_DEBUG -# include -extern int ZLIB_INTERNAL z_verbose; -extern void ZLIB_INTERNAL z_error OF((char * m)); -# define Assert(cond, msg) \ - { \ - if (!(cond)) \ - z_error(msg); \ - } -# define Trace(x) \ - { \ - if (z_verbose >= 0) \ - fprintf x; \ - } -# define Tracev(x) \ - { \ - if (z_verbose > 0) \ - fprintf x; \ - } -# define Tracevv(x) \ - { \ - if (z_verbose > 1) \ - fprintf x; \ - } -# define Tracec(c, x) \ - { \ - if (z_verbose > 0 && (c)) \ - fprintf x; \ - } -# define Tracecv(c, x) \ - { \ - if (z_verbose > 1 && (c)) \ - fprintf x; \ - } -#else -# define Assert(cond, msg) -# define Trace(x) -# define Tracev(x) -# define Tracevv(x) -# define Tracec(c, x) -# define Tracecv(c, x) -#endif - -#ifndef Z_SOLO -voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, unsigned size)); -void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); -#endif - -#define ZALLOC(strm, items, size) (*((strm)->zalloc))((strm)->opaque, (items), (size)) -#define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) -#define TRY_FREE(s, p) \ - { \ - if (p) \ - ZFREE(s, p); \ - } - -/* Reverse the bytes in a 32-bit value */ -#define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + (((q)&0xff00) << 8) + (((q)&0xff) << 24)) - -#endif /* ZUTIL_H */ diff --git a/base/poco/Net/include/Poco/Net/SMTPChannel.h b/base/poco/Net/include/Poco/Net/SMTPChannel.h deleted file mode 100644 index d913ccbc808..00000000000 --- a/base/poco/Net/include/Poco/Net/SMTPChannel.h +++ /dev/null @@ -1,109 +0,0 @@ -// -// SMTPChannel.h -// -// Library: Net -// Package: Logging -// Module: SMTPChannel -// -// Definition of the SMTPChannel class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Net_SMTPChannel_INCLUDED -#define Net_SMTPChannel_INCLUDED - - -#include "Poco/Channel.h" -#include "Poco/Net/Net.h" -#include "Poco/String.h" - - -namespace Poco -{ -namespace Net -{ - - - class Net_API SMTPChannel : public Poco::Channel - /// This Channel implements SMTP (email) logging. - { - public: - SMTPChannel(); - /// Creates a SMTPChannel. - - SMTPChannel(const std::string & mailhost, const std::string & sender, const std::string & recipient); - /// Creates a SMTPChannel with the given target mailhost, sender, and recipient. - - void open(); - /// Opens the SMTPChannel. - - void close(); - /// Closes the SMTPChannel. - - void log(const Message & msg); - /// Sends the message's text to the recipient. - - void setProperty(const std::string & name, const std::string & value); - /// Sets the property with the given value. - /// - /// The following properties are supported: - /// * mailhost: The SMTP server. Default is "localhost". - /// * sender: The sender address. - /// * recipient: The recipient address. - /// * local: If true, local time is used. Default is true. - /// * attachment: Filename of the file to attach. - /// * type: Content type of the file to attach. - /// * delete: Boolean value indicating whether to delete - /// the attachment file after sending. - /// * throw: Boolean value indicating whether to throw - /// exception upon failure. - - std::string getProperty(const std::string & name) const; - /// Returns the value of the property with the given name. - - static void registerChannel(); - /// Registers the channel with the global LoggingFactory. - - static const std::string PROP_MAILHOST; - static const std::string PROP_SENDER; - static const std::string PROP_RECIPIENT; - static const std::string PROP_LOCAL; - static const std::string PROP_ATTACHMENT; - static const std::string PROP_TYPE; - static const std::string PROP_DELETE; - static const std::string PROP_THROW; - - protected: - ~SMTPChannel(); - - private: - bool isTrue(const std::string & value) const; - - std::string _mailHost; - std::string _sender; - std::string _recipient; - bool _local; - std::string _attachment; - std::string _type; - bool _delete; - bool _throw; - }; - - - inline bool SMTPChannel::isTrue(const std::string & value) const - { - return ( - (0 == icompare(value, "true")) || (0 == icompare(value, "t")) || (0 == icompare(value, "yes")) || (0 == icompare(value, "y"))); - } - - -} -} // namespace Poco::Net - - -#endif // Net_SMTPChannel_INCLUDED diff --git a/base/poco/Net/include/Poco/Net/SocketImpl.h b/base/poco/Net/include/Poco/Net/SocketImpl.h index e08d49be7a2..082f258fa98 100644 --- a/base/poco/Net/include/Poco/Net/SocketImpl.h +++ b/base/poco/Net/include/Poco/Net/SocketImpl.h @@ -399,9 +399,12 @@ namespace Net bool initialized() const; /// Returns true iff the underlying socket is initialized. + static void error(int code); + /// Throws an appropriate exception for the given error code. + protected: - SocketImpl(); - /// Creates a SocketImpl. + SocketImpl(); + /// Creates a SocketImpl. SocketImpl(poco_socket_t sockfd); /// Creates a SocketImpl using the given native socket. @@ -446,9 +449,6 @@ namespace Net static void error(const std::string & arg); /// Throws an appropriate exception for the last error. - static void error(int code); - /// Throws an appropriate exception for the given error code. - static void error(int code, const std::string & arg); /// Throws an appropriate exception for the given error code. diff --git a/base/poco/Net/src/SMTPChannel.cpp b/base/poco/Net/src/SMTPChannel.cpp deleted file mode 100644 index a498179a44e..00000000000 --- a/base/poco/Net/src/SMTPChannel.cpp +++ /dev/null @@ -1,210 +0,0 @@ -// -// SMTPChannel.cpp -// -// Library: Net -// Package: Logging -// Module: SMTPChannel -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Net/SMTPChannel.h" -#include "Poco/Net/MailMessage.h" -#include "Poco/Net/MailRecipient.h" -#include "Poco/Net/SMTPClientSession.h" -#include "Poco/Net/StringPartSource.h" -#include "Poco/Message.h" -#include "Poco/DateTimeFormatter.h" -#include "Poco/DateTimeFormat.h" -#include "Poco/LocalDateTime.h" -#include "Poco/LoggingFactory.h" -#include "Poco/Instantiator.h" -#include "Poco/NumberFormatter.h" -#include "Poco/FileStream.h" -#include "Poco/File.h" -#include "Poco/Environment.h" - - -namespace Poco { -namespace Net { - - -const std::string SMTPChannel::PROP_MAILHOST("mailhost"); -const std::string SMTPChannel::PROP_SENDER("sender"); -const std::string SMTPChannel::PROP_RECIPIENT("recipient"); -const std::string SMTPChannel::PROP_LOCAL("local"); -const std::string SMTPChannel::PROP_ATTACHMENT("attachment"); -const std::string SMTPChannel::PROP_TYPE("type"); -const std::string SMTPChannel::PROP_DELETE("delete"); -const std::string SMTPChannel::PROP_THROW("throw"); - - -SMTPChannel::SMTPChannel(): - _mailHost("localhost"), - _local(true), - _type("text/plain"), - _delete(false), - _throw(false) -{ -} - - -SMTPChannel::SMTPChannel(const std::string& mailhost, const std::string& sender, const std::string& recipient): - _mailHost(mailhost), - _sender(sender), - _recipient(recipient), - _local(true), - _type("text/plain"), - _delete(false), - _throw(false) -{ -} - - -SMTPChannel::~SMTPChannel() -{ - try - { - close(); - } - catch (...) - { - poco_unexpected(); - } -} - - -void SMTPChannel::open() -{ -} - - -void SMTPChannel::close() -{ -} - - -void SMTPChannel::log(const Message& msg) -{ - try - { - MailMessage message; - message.setSender(_sender); - message.addRecipient(MailRecipient(MailRecipient::PRIMARY_RECIPIENT, _recipient)); - message.setSubject("Log Message from " + _sender); - std::stringstream content; - content << "Log Message\r\n" - << "===========\r\n\r\n" - << "Host: " << Environment::nodeName() << "\r\n" - << "Logger: " << msg.getSource() << "\r\n"; - - if (_local) - { - DateTime dt(msg.getTime()); - content << "Timestamp: " << DateTimeFormatter::format(LocalDateTime(dt), DateTimeFormat::RFC822_FORMAT) << "\r\n"; - } - else - content << "Timestamp: " << DateTimeFormatter::format(msg.getTime(), DateTimeFormat::RFC822_FORMAT) << "\r\n"; - - content << "Priority: " << NumberFormatter::format(msg.getPriority()) << "\r\n" - << "Process ID: " << NumberFormatter::format(msg.getPid()) << "\r\n" - << "Thread: " << msg.getThread() << " (ID: " << msg.getTid() << ")\r\n" - << "Message text: " << msg.getText() << "\r\n\r\n"; - - message.addContent(new StringPartSource(content.str())); - - if (!_attachment.empty()) - { - { - Poco::FileInputStream fis(_attachment, std::ios::in | std::ios::binary | std::ios::ate); - if (fis.good()) - { - typedef std::allocator::size_type SST; - - std::streamoff size = fis.tellg(); - poco_assert (std::numeric_limits::max() >= size); - poco_assert (std::numeric_limits::max() >= size); - char* pMem = new char [static_cast(size)]; - fis.seekg(std::ios::beg); - fis.read(pMem, size); - message.addAttachment(_attachment, - new StringPartSource(std::string(pMem, static_cast(size)), - _type, - _attachment)); - - delete [] pMem; - } - } - if (_delete) File(_attachment).remove(); - } - - SMTPClientSession session(_mailHost); - session.login(); - session.sendMessage(message); - session.close(); - } - catch (Exception&) - { - if (_throw) throw; - } -} - - -void SMTPChannel::setProperty(const std::string& name, const std::string& value) -{ - if (name == PROP_MAILHOST) - _mailHost = value; - else if (name == PROP_SENDER) - _sender = value; - else if (name == PROP_RECIPIENT) - _recipient = value; - else if (name == PROP_LOCAL) - _local = isTrue(value); - else if (name == PROP_ATTACHMENT) - _attachment = value; - else if (name == PROP_TYPE) - _type = value; - else if (name == PROP_DELETE) - _delete = isTrue(value); - else if (name == PROP_THROW) - _throw = isTrue(value); - else - Channel::setProperty(name, value); -} - - -std::string SMTPChannel::getProperty(const std::string& name) const -{ - if (name == PROP_MAILHOST) - return _mailHost; - else if (name == PROP_SENDER) - return _sender; - else if (name == PROP_RECIPIENT) - return _recipient; - else if (name == PROP_LOCAL) - return _local ? "true" : "false"; - else if (name == PROP_ATTACHMENT) - return _attachment; - else if (name == PROP_TYPE) - return _type; - else if (name == PROP_DELETE) - return _delete ? "true" : "false"; - else if (name == PROP_THROW) - return _throw ? "true" : "false"; - else - return Channel::getProperty(name); -} - - -void SMTPChannel::registerChannel() -{ - Poco::LoggingFactory::defaultFactory().registerChannelClass("SMTPChannel", - new Poco::Instantiator); -} - - -} } // namespace Poco::Net diff --git a/base/poco/Util/include/Poco/Util/ConfigurationMapper.h b/base/poco/Util/include/Poco/Util/ConfigurationMapper.h deleted file mode 100644 index dc1dd1fe86c..00000000000 --- a/base/poco/Util/include/Poco/Util/ConfigurationMapper.h +++ /dev/null @@ -1,97 +0,0 @@ -// -// ConfigurationMapper.h -// -// Library: Util -// Package: Configuration -// Module: ConfigurationMapper -// -// Definition of the ConfigurationMapper class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Util_ConfigurationMapper_INCLUDED -#define Util_ConfigurationMapper_INCLUDED - - -#include "Poco/Util/AbstractConfiguration.h" -#include "Poco/Util/Util.h" - - -namespace Poco -{ -namespace Util -{ - - - class Util_API ConfigurationMapper : public AbstractConfiguration - /// This configuration maps a property hierarchy into another - /// hierarchy. - /// - /// For example, given a configuration with the following properties: - /// config.value1 - /// config.value2 - /// config.sub.value1 - /// config.sub.value2 - /// and a ConfigurationView with fromPrefix == "config" and toPrefix == "root.conf", then - /// the above properties will be available via the mapper as - /// root.conf.value1 - /// root.conf.value2 - /// root.conf.sub.value1 - /// root.conf.sub.value2 - /// - /// FromPrefix can be empty, in which case, and given toPrefix == "root", - /// the properties will be available as - /// root.config.value1 - /// root.config.value2 - /// root.config.sub.value1 - /// root.config.sub.value2 - /// - /// This is equivalent to the functionality of the ConfigurationView class. - /// - /// Similarly, toPrefix can also be empty. Given fromPrefix == "config" and - /// toPrefix == "", the properties will be available as - /// value1 - /// value2 - /// sub.value1 - /// sub.value2 - /// - /// If both fromPrefix and toPrefix are empty, no mapping is performed. - /// - /// A ConfigurationMapper is most useful in combination with a - /// LayeredConfiguration. - { - public: - ConfigurationMapper(const std::string & fromPrefix, const std::string & toPrefix, AbstractConfiguration * pConfig); - /// Creates the ConfigurationMapper. The ConfigurationMapper does not take - /// ownership of the passed configuration. - - protected: - bool getRaw(const std::string & key, std::string & value) const; - void setRaw(const std::string & key, const std::string & value); - void enumerate(const std::string & key, Keys & range) const; - void removeRaw(const std::string & key); - - std::string translateKey(const std::string & key) const; - - ~ConfigurationMapper(); - - private: - ConfigurationMapper(const ConfigurationMapper &); - ConfigurationMapper & operator=(const ConfigurationMapper &); - - std::string _fromPrefix; - std::string _toPrefix; - AbstractConfiguration * _pConfig; - }; - - -} -} // namespace Poco::Util - - -#endif // Util_ConfigurationMapper_INCLUDED diff --git a/base/poco/Util/include/Poco/Util/WinRegistryConfiguration.h b/base/poco/Util/include/Poco/Util/WinRegistryConfiguration.h deleted file mode 100644 index 43dc34f630b..00000000000 --- a/base/poco/Util/include/Poco/Util/WinRegistryConfiguration.h +++ /dev/null @@ -1,75 +0,0 @@ -// -// WinRegistryConfiguration.h -// -// Library: Util -// Package: Windows -// Module: WinRegistryConfiguration -// -// Definition of the WinRegistryConfiguration class. -// -// Copyright (c) 2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Util_WinRegistryConfiguration_INCLUDED -#define Util_WinRegistryConfiguration_INCLUDED - - -#include "Poco/String.h" -#include "Poco/Util/AbstractConfiguration.h" -#include "Poco/Util/Util.h" - - -namespace Poco -{ -namespace Util -{ - - - class Util_API WinRegistryConfiguration : public AbstractConfiguration - /// An implementation of AbstractConfiguration that stores configuration data - /// in the Windows registry. - /// - /// Removing key is not supported. An attempt to remove a key results - /// in a NotImplementedException being thrown. - { - public: - WinRegistryConfiguration(const std::string & rootPath, REGSAM extraSam = 0); - /// Creates the WinRegistryConfiguration. - /// The rootPath must start with one of the root key names - /// like HKEY_CLASSES_ROOT, e.g. HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services. - /// All further keys are relative to the root path and can be - /// dot separated, e.g. the path MyService.ServiceName will be converted to - /// HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services\MyService\ServiceName. - /// The extraSam parameter will be passed along to WinRegistryKey, to control - /// registry virtualization for example. - - protected: - ~WinRegistryConfiguration(); - /// Destroys the WinRegistryConfiguration. - - bool getRaw(const std::string & key, std::string & value) const; - void setRaw(const std::string & key, const std::string & value); - void enumerate(const std::string & key, Keys & range) const; - void removeRaw(const std::string & key); - - std::string convertToRegFormat(const std::string & key, std::string & keyName) const; - /// Takes a key in the format of A.B.C and converts it to - /// registry format A\B\C, the last entry is the keyName, the rest is returned as path - - friend class WinConfigurationTest; - - private: - std::string _rootPath; - REGSAM _extraSam; - }; - - -} -} // namespace Poco::Util - - -#endif // Util_WinRegistryConfiguration_INCLUDED diff --git a/base/poco/Util/include/Poco/Util/WinRegistryKey.h b/base/poco/Util/include/Poco/Util/WinRegistryKey.h deleted file mode 100644 index 9aa5e35ed8a..00000000000 --- a/base/poco/Util/include/Poco/Util/WinRegistryKey.h +++ /dev/null @@ -1,199 +0,0 @@ -// -// WinRegistryKey.h -// -// Library: Util -// Package: Windows -// Module: WinRegistryKey -// -// Definition of the WinRegistryKey class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Util_WinRegistryKey_INCLUDED -#define Util_WinRegistryKey_INCLUDED - - -#include -#include "Poco/UnWindows.h" -#include "Poco/Util/Util.h" - - -namespace Poco -{ -namespace Util -{ - - - class Util_API WinRegistryKey - /// This class implements a convenient interface to the - /// Windows Registry. - /// - /// This class is only available on Windows platforms. - { - public: - typedef std::vector Keys; - typedef std::vector Values; - - enum Type - { - REGT_NONE = 0, - REGT_STRING = 1, - REGT_STRING_EXPAND = 2, - REGT_BINARY = 3, - REGT_DWORD = 4, - REGT_DWORD_BIG_ENDIAN = 5, - REGT_LINK = 6, - REGT_MULTI_STRING = 7, - REGT_RESOURCE_LIST = 8, - REGT_FULL_RESOURCE_DESCRIPTOR = 9, - REGT_RESOURCE_REQUIREMENTS_LIST = 10, - REGT_QWORD = 11 - }; - - WinRegistryKey(const std::string & key, bool readOnly = false, REGSAM extraSam = 0); - /// Creates the WinRegistryKey. - /// - /// The key must start with one of the root key names - /// like HKEY_CLASSES_ROOT, e.g. HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Services. - /// - /// If readOnly is true, then only read access to the registry - /// is available and any attempt to write to the registry will - /// result in an exception. - /// - /// extraSam is used to pass extra flags (in addition to KEY_READ and KEY_WRITE) - /// to the samDesired argument of RegOpenKeyEx() or RegCreateKeyEx(). - - WinRegistryKey(HKEY hRootKey, const std::string & subKey, bool readOnly = false, REGSAM extraSam = 0); - /// Creates the WinRegistryKey. - /// - /// If readOnly is true, then only read access to the registry - /// is available and any attempt to write to the registry will - /// result in an exception. - /// - /// extraSam is used to pass extra flags (in addition to KEY_READ and KEY_WRITE) - /// to the samDesired argument of RegOpenKeyEx() or RegCreateKeyEx(). - - ~WinRegistryKey(); - /// Destroys the WinRegistryKey. - - void setString(const std::string & name, const std::string & value); - /// Sets the string value (REG_SZ) with the given name. - /// An empty name denotes the default value. - - std::string getString(const std::string & name); - /// Returns the string value (REG_SZ) with the given name. - /// An empty name denotes the default value. - /// - /// Throws a NotFoundException if the value does not exist. - - void setStringExpand(const std::string & name, const std::string & value); - /// Sets the expandable string value (REG_EXPAND_SZ) with the given name. - /// An empty name denotes the default value. - - std::string getStringExpand(const std::string & name); - /// Returns the string value (REG_EXPAND_SZ) with the given name. - /// An empty name denotes the default value. - /// All references to environment variables (%VAR%) in the string - /// are expanded. - /// - /// Throws a NotFoundException if the value does not exist. - - void setBinary(const std::string & name, const std::vector & value); - /// Sets the string value (REG_BINARY) with the given name. - /// An empty name denotes the default value. - - std::vector getBinary(const std::string & name); - /// Returns the string value (REG_BINARY) with the given name. - /// An empty name denotes the default value. - /// - /// Throws a NotFoundException if the value does not exist. - - void setInt(const std::string & name, int value); - /// Sets the numeric (REG_DWORD) value with the given name. - /// An empty name denotes the default value. - - int getInt(const std::string & name); - /// Returns the numeric value (REG_DWORD) with the given name. - /// An empty name denotes the default value. - /// - /// Throws a NotFoundException if the value does not exist. - - - void setInt64(const std::string & name, Poco::Int64 value); - /// Sets the numeric (REG_QWORD) value with the given name. - /// An empty name denotes the default value. - - Poco::Int64 getInt64(const std::string & name); - /// Returns the numeric value (REG_QWORD) with the given name. - /// An empty name denotes the default value. - /// - /// Throws a NotFoundException if the value does not exist. - - - void deleteValue(const std::string & name); - /// Deletes the value with the given name. - /// - /// Throws a NotFoundException if the value does not exist. - - void deleteKey(); - /// Recursively deletes the key and all subkeys. - - bool exists(); - /// Returns true iff the key exists. - - Type type(const std::string & name); - /// Returns the type of the key value. - - bool exists(const std::string & name); - /// Returns true iff the given value exists under that key. - - void subKeys(Keys & keys); - /// Appends all subKey names to keys. - - void values(Values & vals); - /// Appends all value names to vals; - - bool isReadOnly() const; - /// Returns true iff the key has been opened for read-only access only. - - protected: - void open(); - void close(); - std::string key() const; - std::string key(const std::string & valueName) const; - HKEY handle(); - void handleSetError(const std::string & name); - static HKEY handleFor(const std::string & rootKey); - - private: - WinRegistryKey(); - WinRegistryKey(const WinRegistryKey &); - WinRegistryKey & operator=(const WinRegistryKey &); - - HKEY _hRootKey; - std::string _subKey; - HKEY _hKey; - bool _readOnly; - REGSAM _extraSam; - }; - - - // - // inlines - // - inline bool WinRegistryKey::isReadOnly() const - { - return _readOnly; - } - - -} -} // namespace Poco::Util - - -#endif // Util_WinRegistryKey_INCLUDED diff --git a/base/poco/Util/include/Poco/Util/WinService.h b/base/poco/Util/include/Poco/Util/WinService.h deleted file mode 100644 index 52377dfb67b..00000000000 --- a/base/poco/Util/include/Poco/Util/WinService.h +++ /dev/null @@ -1,140 +0,0 @@ -// -// WinService.h -// -// Library: Util -// Package: Windows -// Module: WinService -// -// Definition of the WinService class. -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Util_WinService_INCLUDED -#define Util_WinService_INCLUDED - - -#include "Poco/UnWindows.h" -#include "Poco/Util/Util.h" - - -# define POCO_LPQUERY_SERVICE_CONFIG LPQUERY_SERVICE_CONFIGA - - -namespace Poco -{ -namespace Util -{ - - - class Util_API WinService - /// This class provides an object-oriented interface to - /// the Windows Service Control Manager for registering, - /// unregistering, configuring, starting and stopping - /// services. - /// - /// This class is only available on Windows platforms. - { - public: - enum Startup - { - SVC_AUTO_START, - SVC_MANUAL_START, - SVC_DISABLED - }; - - WinService(const std::string & name); - /// Creates the WinService, using the given service name. - - ~WinService(); - /// Destroys the WinService. - - const std::string & name() const; - /// Returns the service name. - - std::string displayName() const; - /// Returns the service's display name. - - std::string path() const; - /// Returns the path to the service executable. - /// - /// Throws a NotFoundException if the service has not been registered. - - void registerService(const std::string & path, const std::string & displayName); - /// Creates a Windows service with the executable specified by path - /// and the given displayName. - /// - /// Throws a ExistsException if the service has already been registered. - - void registerService(const std::string & path); - /// Creates a Windows service with the executable specified by path - /// and the given displayName. The service name is used as display name. - /// - /// Throws a ExistsException if the service has already been registered. - - void unregisterService(); - /// Deletes the Windows service. - /// - /// Throws a NotFoundException if the service has not been registered. - - bool isRegistered() const; - /// Returns true if the service has been registered with the Service Control Manager. - - bool isRunning() const; - /// Returns true if the service is currently running. - - void start(); - /// Starts the service. - /// Does nothing if the service is already running. - /// - /// Throws a NotFoundException if the service has not been registered. - - void stop(); - /// Stops the service. - /// Does nothing if the service is not running. - /// - /// Throws a NotFoundException if the service has not been registered. - - void setStartup(Startup startup); - /// Sets the startup mode for the service. - - Startup getStartup() const; - /// Returns the startup mode for the service. - - void setDescription(const std::string & description); - /// Sets the service description in the registry. - - std::string getDescription() const; - /// Returns the service description from the registry. - - static const int STARTUP_TIMEOUT; - - protected: - static const std::string REGISTRY_KEY; - static const std::string REGISTRY_DESCRIPTION; - - private: - void open() const; - bool tryOpen() const; - void close() const; - POCO_LPQUERY_SERVICE_CONFIG config() const; - - WinService(); - WinService(const WinService &); - WinService & operator=(const WinService &); - - std::string _name; - SC_HANDLE _scmHandle; - mutable SC_HANDLE _svcHandle; - }; - - -} -} // namespace Poco::Util - - -#endif // Util_WinService_INCLUDED diff --git a/base/poco/Util/src/ConfigurationMapper.cpp b/base/poco/Util/src/ConfigurationMapper.cpp deleted file mode 100644 index d76f9c0b6da..00000000000 --- a/base/poco/Util/src/ConfigurationMapper.cpp +++ /dev/null @@ -1,101 +0,0 @@ -// -// ConfigurationMapper.cpp -// -// Library: Util -// Package: Configuration -// Module: ConfigurationMapper -// -// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Util/ConfigurationMapper.h" - - -namespace Poco { -namespace Util { - - -ConfigurationMapper::ConfigurationMapper(const std::string& fromPrefix, const std::string& toPrefix, AbstractConfiguration* pConfig): - _fromPrefix(fromPrefix), - _toPrefix(toPrefix), - _pConfig(pConfig) -{ - poco_check_ptr (pConfig); - - if (!_fromPrefix.empty()) _fromPrefix += '.'; - if (!_toPrefix.empty()) _toPrefix += '.'; - - _pConfig->duplicate(); -} - - -ConfigurationMapper::~ConfigurationMapper() -{ - _pConfig->release(); -} - - -bool ConfigurationMapper::getRaw(const std::string& key, std::string& value) const -{ - std::string translatedKey = translateKey(key); - return _pConfig->getRaw(translatedKey, value); -} - - -void ConfigurationMapper::setRaw(const std::string& key, const std::string& value) -{ - std::string translatedKey = translateKey(key); - _pConfig->setRaw(translatedKey, value); -} - - -void ConfigurationMapper::enumerate(const std::string& key, Keys& range) const -{ - std::string cKey(key); - if (!cKey.empty()) cKey += '.'; - std::string::size_type keyLen = cKey.length(); - if (keyLen < _toPrefix.length()) - { - if (_toPrefix.compare(0, keyLen, cKey) == 0) - { - std::string::size_type pos = _toPrefix.find_first_of('.', keyLen); - poco_assert_dbg(pos != std::string::npos); - range.push_back(_toPrefix.substr(keyLen, pos - keyLen)); - } - } - else - { - std::string translatedKey; - if (cKey == _toPrefix) - { - translatedKey = _fromPrefix; - if (!translatedKey.empty()) - translatedKey.resize(translatedKey.length() - 1); - } - else translatedKey = translateKey(key); - _pConfig->enumerate(translatedKey, range); - } -} - - -void ConfigurationMapper::removeRaw(const std::string& key) -{ - std::string translatedKey = translateKey(key); - _pConfig->remove(translatedKey); -} - - -std::string ConfigurationMapper::translateKey(const std::string& key) const -{ - std::string result(key); - if (result.compare(0, _toPrefix.size(), _toPrefix) == 0) - result.replace(0, _toPrefix.size(), _fromPrefix); - return result; -} - - -} } // namespace Poco::Util diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 9bb148c12a9..462529fbc13 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54473) +SET(VERSION_REVISION 54474) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 4) +SET(VERSION_MINOR 5) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 46e85357ce2da2a99f56ee83a079e892d7ec3726) -SET(VERSION_DESCRIBE v23.4.1.1-testing) -SET(VERSION_STRING 23.4.1.1) +SET(VERSION_GITHASH 3920eb987f7ed837ada5de8907284adf123f0583) +SET(VERSION_DESCRIBE v23.5.1.1-testing) +SET(VERSION_STRING 23.5.1.1) # end of autochange diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index 3e6e4907a71..812847e6201 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -2,13 +2,6 @@ set (DEFAULT_LIBS "-nodefaultlibs") set (DEFAULT_LIBS "${DEFAULT_LIBS} ${COVERAGE_OPTION} -lc -lm -lpthread -ldl") -if (COMPILER_GCC) - set (DEFAULT_LIBS "${DEFAULT_LIBS} -lgcc_eh") - if (ARCH_AARCH64) - set (DEFAULT_LIBS "${DEFAULT_LIBS} -lgcc") - endif () -endif () - message(STATUS "Default libraries: ${DEFAULT_LIBS}") set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS}) diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 3e1f22ef2e4..65bf296ee09 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -1,9 +1,5 @@ set (DEFAULT_LIBS "-nodefaultlibs") -if (NOT COMPILER_CLANG) - message (FATAL_ERROR "FreeBSD build is supported only for Clang") -endif () - if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "amd64") execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-x86_64.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE) else () diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 23c5fc3e14f..d42d587303a 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -11,8 +11,6 @@ if (COMPILER_CLANG) if (NOT EXISTS "${BUILTINS_LIBRARY}") set (BUILTINS_LIBRARY "-lgcc") endif () -else () - set (BUILTINS_LIBRARY "-lgcc") endif () if (OS_ANDROID) diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index cf7f7606618..fc9793d8f35 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -8,12 +8,6 @@ option (SANITIZE "Enable one of the code sanitizers" "") set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER") -# gcc with -nodefaultlibs does not add sanitizer libraries -# with -static-libasan and similar -macro(add_explicit_sanitizer_library lib) - target_link_libraries(global-libs INTERFACE "-Wl,-static -l${lib} -Wl,-Bdynamic") -endmacro() - if (SANITIZE) if (SANITIZE STREQUAL "address") # LLVM-15 has a bug in Address Sanitizer, preventing the usage of 'sanitize-address-use-after-scope', @@ -28,9 +22,6 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libasan") endif () - if (COMPILER_GCC) - add_explicit_sanitizer_library(asan) - endif() elseif (SANITIZE STREQUAL "memory") # MemorySanitizer flags are set according to the official documentation: @@ -42,8 +33,7 @@ if (SANITIZE) # RelWithDebInfo, and downgrade optimizations to -O1 but not to -Og, to # keep the binary size down. # TODO: try compiling with -Og and with ld.gold. - set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt") - + set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-use-after-dtor -fsanitize-memory-track-origins -fno-optimize-sibling-calls -fPIC -fpie -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/msan_suppressions.txt") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}") @@ -58,11 +48,6 @@ if (SANITIZE) set (TSAN_FLAGS "-fsanitize=thread") if (COMPILER_CLANG) set (TSAN_FLAGS "${TSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt") - else() - set (MESSAGE "TSAN suppressions was not passed to the compiler (since the compiler is not clang)\n") - set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n") - set (MESSAGE "${MESSAGE} export TSAN_OPTIONS=\"$TSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/tsan_suppressions.txt\"") - message (WARNING "${MESSAGE}") endif() @@ -74,9 +59,6 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libtsan") endif () - if (COMPILER_GCC) - add_explicit_sanitizer_library(tsan) - endif() elseif (SANITIZE STREQUAL "undefined") set (UBSAN_FLAGS "-fsanitize=undefined -fno-sanitize-recover=all -fno-sanitize=float-divide-by-zero") @@ -91,11 +73,6 @@ if (SANITIZE) endif() if (COMPILER_CLANG) set (UBSAN_FLAGS "${UBSAN_FLAGS} -fsanitize-blacklist=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt") - else() - set (MESSAGE "UBSAN suppressions was not passed to the compiler (since the compiler is not clang)\n") - set (MESSAGE "${MESSAGE}Use the following command to pass them manually:\n") - set (MESSAGE "${MESSAGE} export UBSAN_OPTIONS=\"$UBSAN_OPTIONS suppressions=${CMAKE_SOURCE_DIR}/tests/ubsan_suppressions.txt\"") - message (WARNING "${MESSAGE}") endif() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${UBSAN_FLAGS}") @@ -106,9 +83,6 @@ if (SANITIZE) if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libubsan") endif () - if (COMPILER_GCC) - add_explicit_sanitizer_library(ubsan) - endif() # llvm-tblgen, that is used during LLVM build, doesn't work with UBSan. set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "") diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 3572134d89f..8e6da1051bc 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -1,8 +1,6 @@ # Compiler -if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - set (COMPILER_GCC 1) -elseif (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") +if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") set (COMPILER_CLANG 1) # Safe to treat AppleClang as a regular Clang, in general. elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") set (COMPILER_CLANG 1) @@ -18,16 +16,8 @@ message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}") set (CLANG_MINIMUM_VERSION 15) set (XCODE_MINIMUM_VERSION 12.0) set (APPLE_CLANG_MINIMUM_VERSION 12.0.0) -set (GCC_MINIMUM_VERSION 11) -if (COMPILER_GCC) - message (FATAL_ERROR "Compilation with GCC is unsupported. Please use Clang instead.") - - if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${GCC_MINIMUM_VERSION}) - message (FATAL_ERROR "Compilation with GCC version ${CMAKE_CXX_COMPILER_VERSION} is unsupported, the minimum required version is ${GCC_MINIMUM_VERSION}.") - endif () - -elseif (COMPILER_CLANG) +if (COMPILER_CLANG) if (CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") # (Experimental!) Specify "-DALLOW_APPLECLANG=ON" when running CMake configuration step, if you want to experiment with using it. if (NOT ALLOW_APPLECLANG AND NOT DEFINED ENV{ALLOW_APPLECLANG}) @@ -58,9 +48,7 @@ if (LINKER_NAME MATCHES "gold") endif () if (NOT LINKER_NAME) - if (COMPILER_GCC) - find_program (LLD_PATH NAMES "ld.lld") - elseif (COMPILER_CLANG) + if (COMPILER_CLANG) if (OS_LINUX) if (NOT ARCH_S390X) # s390x doesnt support lld find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "ld.lld") @@ -69,10 +57,7 @@ if (NOT LINKER_NAME) endif () if (OS_LINUX) if (LLD_PATH) - if (COMPILER_GCC) - # GCC driver requires one of supported linker names like "lld". - set (LINKER_NAME "lld") - else () + if (COMPILER_CLANG) # Clang driver simply allows full linker path. set (LINKER_NAME ${LLD_PATH}) endif () @@ -91,8 +76,6 @@ if (LINKER_NAME) configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}") - else () - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}") endif () endif () @@ -105,9 +88,7 @@ endif() # Archiver -if (COMPILER_GCC) - find_program (LLVM_AR_PATH NAMES "llvm-ar" "llvm-ar-15" "llvm-ar-14" "llvm-ar-13" "llvm-ar-12") -else () +if (COMPILER_CLANG) find_program (LLVM_AR_PATH NAMES "llvm-ar-${COMPILER_VERSION_MAJOR}" "llvm-ar") endif () @@ -119,9 +100,7 @@ message(STATUS "Using archiver: ${CMAKE_AR}") # Ranlib -if (COMPILER_GCC) - find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib" "llvm-ranlib-15" "llvm-ranlib-14" "llvm-ranlib-13" "llvm-ranlib-12") -else () +if (COMPILER_CLANG) find_program (LLVM_RANLIB_PATH NAMES "llvm-ranlib-${COMPILER_VERSION_MAJOR}" "llvm-ranlib") endif () @@ -133,9 +112,7 @@ message(STATUS "Using ranlib: ${CMAKE_RANLIB}") # Install Name Tool -if (COMPILER_GCC) - find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool" "llvm-install-name-tool-15" "llvm-install-name-tool-14" "llvm-install-name-tool-13" "llvm-install-name-tool-12") -else () +if (COMPILER_CLANG) find_program (LLVM_INSTALL_NAME_TOOL_PATH NAMES "llvm-install-name-tool-${COMPILER_VERSION_MAJOR}" "llvm-install-name-tool") endif () @@ -147,9 +124,7 @@ message(STATUS "Using install-name-tool: ${CMAKE_INSTALL_NAME_TOOL}") # Objcopy -if (COMPILER_GCC) - find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-15" "llvm-objcopy-14" "llvm-objcopy-13" "llvm-objcopy-12" "objcopy") -else () +if (COMPILER_CLANG) find_program (OBJCOPY_PATH NAMES "llvm-objcopy-${COMPILER_VERSION_MAJOR}" "llvm-objcopy" "objcopy") endif () @@ -161,9 +136,7 @@ endif () # Strip -if (COMPILER_GCC) - find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-15" "llvm-strip-14" "llvm-strip-13" "llvm-strip-12" "strip") -else () +if (COMPILER_CLANG) find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip") endif () diff --git a/cmake/warnings.cmake b/cmake/warnings.cmake index 5d116b199cf..00fa32a6b7f 100644 --- a/cmake/warnings.cmake +++ b/cmake/warnings.cmake @@ -47,115 +47,4 @@ if (COMPILER_CLANG) no_warning(enum-constexpr-conversion) # breaks magic-enum library in clang-16 no_warning(unsafe-buffer-usage) # too aggressive # TODO Enable conversion, sign-conversion, double-promotion warnings. -elseif (COMPILER_GCC) - # Add compiler options only to c++ compiler - function(add_cxx_compile_options option) - add_compile_options("$<$,CXX>:${option}>") - endfunction() - # Warn about boolean expression compared with an integer value different from true/false - add_cxx_compile_options(-Wbool-compare) - # Warn whenever a pointer is cast such that the required alignment of the target is increased. - add_cxx_compile_options(-Wcast-align) - # Warn whenever a pointer is cast so as to remove a type qualifier from the target type. - add_cxx_compile_options(-Wcast-qual) - # Warn when deleting a pointer to incomplete type, which may cause undefined behavior at runtime - add_cxx_compile_options(-Wdelete-incomplete) - # Warn if a requested optimization pass is disabled. Code is too big or too complex - add_cxx_compile_options(-Wdisabled-optimization) - # Warn about duplicated conditions in an if-else-if chain - add_cxx_compile_options(-Wduplicated-cond) - # Warn about a comparison between values of different enumerated types - add_cxx_compile_options(-Wenum-compare) - # Warn about uninitialized variables that are initialized with themselves - add_cxx_compile_options(-Winit-self) - # Warn about logical not used on the left hand side operand of a comparison - add_cxx_compile_options(-Wlogical-not-parentheses) - # Warn about suspicious uses of logical operators in expressions - add_cxx_compile_options(-Wlogical-op) - # Warn if there exists a path from the function entry to a use of the variable that is uninitialized. - add_cxx_compile_options(-Wmaybe-uninitialized) - # Warn when the indentation of the code does not reflect the block structure - add_cxx_compile_options(-Wmisleading-indentation) - # Warn if a global function is defined without a previous declaration - disabled because of build times - # add_cxx_compile_options(-Wmissing-declarations) - # Warn if a user-supplied include directory does not exist - add_cxx_compile_options(-Wmissing-include-dirs) - # Obvious - add_cxx_compile_options(-Wnon-virtual-dtor) - # Obvious - add_cxx_compile_options(-Wno-return-local-addr) - # This warning is disabled due to false positives if compiled with libc++: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90037 - #add_cxx_compile_options(-Wnull-dereference) - # Obvious - add_cxx_compile_options(-Wodr) - # Obvious - add_cxx_compile_options(-Wold-style-cast) - # Warn when a function declaration hides virtual functions from a base class - # add_cxx_compile_options(-Woverloaded-virtual) - # Warn about placement new expressions with undefined behavior - add_cxx_compile_options(-Wplacement-new=2) - # Warn about anything that depends on the “size of” a function type or of void - add_cxx_compile_options(-Wpointer-arith) - # Warn if anything is declared more than once in the same scope - add_cxx_compile_options(-Wredundant-decls) - # Member initialization reordering - add_cxx_compile_options(-Wreorder) - # Obvious - add_cxx_compile_options(-Wshadow) - # Warn if left shifting a negative value - add_cxx_compile_options(-Wshift-negative-value) - # Warn about a definition of an unsized deallocation function - add_cxx_compile_options(-Wsized-deallocation) - # Warn when the sizeof operator is applied to a parameter that is declared as an array in a function definition - add_cxx_compile_options(-Wsizeof-array-argument) - # Warn for suspicious length parameters to certain string and memory built-in functions if the argument uses sizeof - add_cxx_compile_options(-Wsizeof-pointer-memaccess) - # Warn about overriding virtual functions that are not marked with the override keyword - add_cxx_compile_options(-Wsuggest-override) - # Warn whenever a switch statement has an index of boolean type and the case values are outside the range of a boolean type - add_cxx_compile_options(-Wswitch-bool) - # Warn if a self-comparison always evaluates to true or false - add_cxx_compile_options(-Wtautological-compare) - # Warn about trampolines generated for pointers to nested functions - add_cxx_compile_options(-Wtrampolines) - # Obvious - add_cxx_compile_options(-Wunused) - add_cxx_compile_options(-Wundef) - # Warn if vector operation is not implemented via SIMD capabilities of the architecture - add_cxx_compile_options(-Wvector-operation-performance) - # Warn when a literal 0 is used as null pointer constant. - add_cxx_compile_options(-Wzero-as-null-pointer-constant) - - # The following warnings are generally useful but had to be disabled because of compiler bugs with older GCCs. - # XXX: We should try again on more recent GCCs (--> see CMake variable GCC_MINIMUM_VERSION). - - # gcc10 stuck with this option while compiling GatherUtils code, anyway there are builds with clang that will warn - add_cxx_compile_options(-Wno-sequence-point) - # gcc10 false positive with this warning in MergeTreePartition.cpp - # inlined from 'void writeHexByteLowercase(UInt8, void*)' at ../src/Common/hex.h:39:11, - # inlined from 'DB::String DB::MergeTreePartition::getID(const DB::Block&) const' at ../src/Storages/MergeTree/MergeTreePartition.cpp:85:30: - # ../contrib/libc-headers/x86_64-linux-gnu/bits/string_fortified.h:34:33: error: writing 2 bytes into a region of size 0 [-Werror=stringop-overflow=] - # 34 | return __builtin___memcpy_chk (__dest, __src, __len, __bos0 (__dest)); - # For some reason (bug in gcc?) macro 'GCC diagnostic ignored "-Wstringop-overflow"' doesn't help. - add_cxx_compile_options(-Wno-stringop-overflow) - # reinterpretAs.cpp:182:31: error: ‘void* memcpy(void*, const void*, size_t)’ copying an object of non-trivial type - # ‘using ToFieldType = using FieldType = using UUID = struct StrongTypedef, DB::UUIDTag>’ - # {aka ‘struct StrongTypedef, DB::UUIDTag>’} from an array of ‘const char8_t’ - add_cxx_compile_options(-Wno-error=class-memaccess) - # Maybe false positive... - # In file included from /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:673, - # In function ‘void std::__1::__libcpp_operator_delete(_Args ...) [with _Args = {void*, long unsigned int}]’, - # inlined from ‘void std::__1::__do_deallocate_handle_size(void*, size_t, _Args ...) [with _Args = {}]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:271:34, - # inlined from ‘void std::__1::__libcpp_deallocate(void*, size_t, size_t)’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:285:41, - # inlined from ‘constexpr void std::__1::allocator<_Tp>::deallocate(_Tp*, size_t) [with _Tp = char]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/memory:849:39, - # inlined from ‘static constexpr void std::__1::allocator_traits<_Alloc>::deallocate(std::__1::allocator_traits<_Alloc>::allocator_type&, std::__1::allocator_traits<_Alloc>::pointer, std::__1::allocator_traits<_Alloc>::size_type) [with _Alloc = std::__1::allocator]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/__memory/allocator_traits.h:476:24, - # inlined from ‘std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits; _Allocator = std::__1::allocator]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2219:35, - # inlined from ‘std::__1::basic_string<_CharT, _Traits, _Allocator>::~basic_string() [with _CharT = char; _Traits = std::__1::char_traits; _Allocator = std::__1::allocator]’ at /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/string:2213:1, - # inlined from ‘DB::JSONBuilder::JSONMap::Pair::~Pair()’ at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:90:12, - # inlined from ‘void DB::JSONBuilder::JSONMap::add(std::__1::string, DB::JSONBuilder::ItemPtr)’ at /home/jakalletti/ClickHouse/ClickHouse/src/Common/JSONBuilder.h:97:68, - # inlined from ‘virtual void DB::ExpressionStep::describeActions(DB::JSONBuilder::JSONMap&) const’ at /home/jakalletti/ClickHouse/ClickHouse/src/Processors/QueryPlan/ExpressionStep.cpp:102:12: - # /home/jakalletti/ClickHouse/ClickHouse/contrib/libcxx/include/new:247:20: error: ‘void operator delete(void*, size_t)’ called on a pointer to an unallocated object ‘7598543875853023301’ [-Werror=free-nonheap-object] - add_cxx_compile_options(-Wno-error=free-nonheap-object) - # AggregateFunctionAvg.h:203:100: error: ‘this’ pointer is null [-Werror=nonnull] - add_cxx_compile_options(-Wno-error=nonnull) endif () diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 695d7ec3f92..0ff8b550a98 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -9,8 +9,6 @@ if (WITH_COVERAGE) # disable coverage for contib files and build with optimisations if (COMPILER_CLANG) add_compile_options(-O3 -DNDEBUG -finline-functions -finline-hint-functions ${WITHOUT_COVERAGE_LIST}) - else() - add_compile_options(-O3 -DNDEBUG -finline-functions ${WITHOUT_COVERAGE_LIST}) endif() endif() @@ -193,6 +191,8 @@ add_contrib (google-benchmark-cmake google-benchmark) add_contrib (ulid-c-cmake ulid-c) +add_contrib (isa-l-cmake isa-l) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/cctz b/contrib/cctz index 7c78edd52b4..5e05432420f 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 7c78edd52b4d65acc103c2f195818ffcabe6fe0d +Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2 diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt index b4cf0ad5e66..09ed2fe3f80 100644 --- a/contrib/grpc-cmake/CMakeLists.txt +++ b/contrib/grpc-cmake/CMakeLists.txt @@ -1,9 +1,4 @@ -# disable grpc due to conflicts of abseil (required by grpc) dynamic annotations with libtsan.a -if (SANITIZE STREQUAL "thread" AND COMPILER_GCC) - set(ENABLE_GRPC_DEFAULT OFF) -else() - set(ENABLE_GRPC_DEFAULT ${ENABLE_LIBRARIES}) -endif() +set(ENABLE_GRPC_DEFAULT ${ENABLE_LIBRARIES}) option(ENABLE_GRPC "Use gRPC" ${ENABLE_GRPC_DEFAULT}) if(NOT ENABLE_GRPC) diff --git a/contrib/isa-l b/contrib/isa-l new file mode 160000 index 00000000000..9f2b68f0575 --- /dev/null +++ b/contrib/isa-l @@ -0,0 +1 @@ +Subproject commit 9f2b68f05752097f0f16632fc4a9a86950831efd diff --git a/contrib/isa-l-cmake/CMakeLists.txt b/contrib/isa-l-cmake/CMakeLists.txt new file mode 100644 index 00000000000..fd0218a7b80 --- /dev/null +++ b/contrib/isa-l-cmake/CMakeLists.txt @@ -0,0 +1,186 @@ +set(ISAL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/isa-l") + +# check nasm compiler +include(CheckLanguage) +check_language(ASM_NASM) +if(NOT CMAKE_ASM_NASM_COMPILER) + message(FATAL_ERROR "Please install NASM from 'https://www.nasm.us/' because NASM compiler can not be found!") +endif() + +enable_language(ASM_NASM) + +set(ISAL_C_SRC + ${ISAL_SOURCE_DIR}/crc/crc_base_aliases.c + ${ISAL_SOURCE_DIR}/crc/crc_base.c + ${ISAL_SOURCE_DIR}/crc/crc64_base.c + ${ISAL_SOURCE_DIR}/erasure_code/ec_base.c + ${ISAL_SOURCE_DIR}/erasure_code/ec_base_aliases.c + ${ISAL_SOURCE_DIR}/erasure_code/ec_highlevel_func.c + ${ISAL_SOURCE_DIR}/erasure_code/gen_rs_matrix_limits.c + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_1tbl.c + ${ISAL_SOURCE_DIR}/igzip/adler32_base.c + ${ISAL_SOURCE_DIR}/igzip/encode_df.c + ${ISAL_SOURCE_DIR}/igzip/flatten_ll.c + ${ISAL_SOURCE_DIR}/igzip/generate_custom_hufftables.c + ${ISAL_SOURCE_DIR}/igzip/generate_static_inflate.c + ${ISAL_SOURCE_DIR}/igzip/huff_codes.c + ${ISAL_SOURCE_DIR}/igzip/hufftables_c.c + ${ISAL_SOURCE_DIR}/igzip/igzip_base_aliases.c + ${ISAL_SOURCE_DIR}/igzip/igzip_base.c + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_base.c + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_body.c + ${ISAL_SOURCE_DIR}/igzip/igzip_inflate.c + ${ISAL_SOURCE_DIR}/igzip/igzip.c + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base_aliases.c + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_base.c + ${ISAL_SOURCE_DIR}/programs/igzip_cli.c + ${ISAL_SOURCE_DIR}/raid/raid_base_aliases.c + ${ISAL_SOURCE_DIR}/raid/raid_base.c +) + +set(ISAL_ASM_SRC + ${ISAL_SOURCE_DIR}/crc/crc_multibinary.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_01.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_02.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by4.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4_02.asm + ${ISAL_SOURCE_DIR}/crc/crc16_t10dif_copy_by4.asm + ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8_02.asm + ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc32_gzip_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_01.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_02.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_by4.asm + ${ISAL_SOURCE_DIR}/crc/crc32_ieee_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_00.asm + ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_01.asm + ${ISAL_SOURCE_DIR}/crc/crc32_iscsi_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_norm_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_ecma_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_norm_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_iso_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_norm_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by8.asm + ${ISAL_SOURCE_DIR}/crc/crc64_jones_refl_by16_10.asm + ${ISAL_SOURCE_DIR}/crc/crc64_multibinary.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/ec_multibinary.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_2vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_3vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_4vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_5vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_6vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_dot_prod_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx2.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_avx512.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mad_sse.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_avx.asm + ${ISAL_SOURCE_DIR}/erasure_code/gf_vect_mul_sse.asm + ${ISAL_SOURCE_DIR}/igzip/adler32_avx2_4.asm + ${ISAL_SOURCE_DIR}/igzip/adler32_sse.asm + ${ISAL_SOURCE_DIR}/igzip/bitbuf2.asm + ${ISAL_SOURCE_DIR}/igzip/encode_df_04.asm + ${ISAL_SOURCE_DIR}/igzip/encode_df_06.asm + ${ISAL_SOURCE_DIR}/igzip/heap_macros.asm + ${ISAL_SOURCE_DIR}/igzip/huffman.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_body.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_compare_types.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_01.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_decode_block_stateless_04.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_deflate_hash.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_finish.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_04.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_gen_icf_map_lh1_06.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_body_h1_gr_bt.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_icf_finish.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_inflate_multibinary.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_multibinary.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_04.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_set_long_icf_fg_06.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_01.asm + ${ISAL_SOURCE_DIR}/igzip/igzip_update_histogram_04.asm + ${ISAL_SOURCE_DIR}/igzip/lz0a_const.asm + ${ISAL_SOURCE_DIR}/igzip/options.asm + ${ISAL_SOURCE_DIR}/igzip/proc_heap.asm + ${ISAL_SOURCE_DIR}/igzip/rfc1951_lookup.asm + ${ISAL_SOURCE_DIR}/igzip/stdmac.asm + ${ISAL_SOURCE_DIR}/mem/mem_multibinary.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx2.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_avx512.asm + ${ISAL_SOURCE_DIR}/mem/mem_zero_detect_sse.asm + ${ISAL_SOURCE_DIR}/raid/pq_check_sse.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_avx.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_avx2.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_avx512.asm + ${ISAL_SOURCE_DIR}/raid/pq_gen_sse.asm + ${ISAL_SOURCE_DIR}/raid/raid_multibinary.asm + ${ISAL_SOURCE_DIR}/raid/xor_check_sse.asm + ${ISAL_SOURCE_DIR}/raid/xor_gen_avx.asm + ${ISAL_SOURCE_DIR}/raid/xor_gen_avx512.asm + ${ISAL_SOURCE_DIR}/raid/xor_gen_sse.asm +) + +# Adding ISA-L library target +add_library(_isal ${ISAL_C_SRC} ${ISAL_ASM_SRC}) + +# Setting external and internal interfaces for ISA-L library +target_include_directories(_isal + PUBLIC ${ISAL_SOURCE_DIR}/include + PUBLIC ${ISAL_SOURCE_DIR}/igzip + PUBLIC ${ISAL_SOURCE_DIR}/crc + PUBLIC ${ISAL_SOURCE_DIR}/erasure_code) + +# Here must remove "-fno-sanitize=undefined" from COMPILE_OPTIONS. +# Otherwise nasm compiler would fail to proceed due to unrecognition of "-fno-sanitize=undefined" +if (SANITIZE STREQUAL "undefined") + get_target_property(target_options _isal COMPILE_OPTIONS) + list(REMOVE_ITEM target_options "-fno-sanitize=undefined") + set_property(TARGET _isal PROPERTY COMPILE_OPTIONS ${target_options}) +endif() + +add_library(ch_contrib::isal ALIAS _isal) diff --git a/contrib/krb5 b/contrib/krb5 index 9453aec0d50..b56ce6ba690 160000 --- a/contrib/krb5 +++ b/contrib/krb5 @@ -1 +1 @@ -Subproject commit 9453aec0d50e5aff9b189051611b321b40935d02 +Subproject commit b56ce6ba690e1f320df1a64afa34980c3e462617 diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index 93b90c15201..44058456ed4 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -15,10 +15,6 @@ if(NOT AWK_PROGRAM) message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.") endif() -if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) - add_compile_definitions(USE_BORINGSSL=1) -endif () - set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src") set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private") @@ -162,6 +158,11 @@ set(ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/builtin/kdf.c" "${KRB5_SOURCE_DIR}/lib/crypto/builtin/cmac.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/des_keys.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/f_parity.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/enc_provider/rc4.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/hash_provider/hash_md4.c" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4/md4.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/prng.c" "${KRB5_SOURCE_DIR}/lib/crypto/krb/enc_dk_cmac.c" # "${KRB5_SOURCE_DIR}/lib/crypto/krb/crc32.c" @@ -226,7 +227,6 @@ set(ALL_SRCS # "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/rc4.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des3.c" - #"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/cmac.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/sha256.c" "${KRB5_SOURCE_DIR}/lib/crypto/openssl/hmac.c" @@ -474,6 +474,14 @@ set(ALL_SRCS "${KRB5_SOURCE_DIR}/lib/krb5/krb5_libinit.c" ) +if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) + add_compile_definitions(USE_BORINGSSL=1) +else() + set(ALL_SRCS ${ALL_SRCS} + "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c" + ) +endif() + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/compile_et" COMMAND /bin/sh @@ -673,6 +681,7 @@ target_include_directories(_krb5 PRIVATE "${KRB5_SOURCE_DIR}/lib/gssapi/krb5" "${KRB5_SOURCE_DIR}/lib/gssapi/spnego" "${KRB5_SOURCE_DIR}/util/et" + "${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4" "${KRB5_SOURCE_DIR}/lib/crypto/openssl" "${KRB5_SOURCE_DIR}/lib/crypto/krb" "${KRB5_SOURCE_DIR}/util/profile" diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 21ed76f8b6f..a13e4f0f60a 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -69,11 +69,6 @@ if (USE_MUSL) target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1) endif () -# Override the deduced attribute support that causes error. -if (OS_DARWIN AND COMPILER_GCC) - add_compile_definitions(_LIBCPP_INIT_PRIORITY_MAX) -endif () - target_compile_options(cxx PUBLIC $<$:-nostdinc++>) # Third party library may have substandard code. @@ -84,11 +79,6 @@ target_compile_definitions(cxx PUBLIC -D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS target_link_libraries(cxx PUBLIC cxxabi) -# For __udivmodti4, __divmodti4. -if (OS_DARWIN AND COMPILER_GCC) - target_link_libraries(cxx PRIVATE gcc) -endif () - install( TARGETS cxx EXPORT global diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index 9ee3ce77215..164b89253fa 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit 9ee3ce77215fca83b7fdfcfe2186a3db0d0bdb74 +Subproject commit 164b89253fad7991bce77882f01b51ab81d19f3d diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index c22cac731fe..d9f7009c1bd 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -70,6 +70,30 @@ set(SRCS "${HDFS3_SOURCE_DIR}/client/Token.cpp" "${HDFS3_SOURCE_DIR}/client/PacketPool.cpp" "${HDFS3_SOURCE_DIR}/client/OutputStream.cpp" + "${HDFS3_SOURCE_DIR}/client/AbstractNativeRawDecoder.cpp" + "${HDFS3_SOURCE_DIR}/client/AbstractNativeRawEncoder.cpp" + "${HDFS3_SOURCE_DIR}/client/ByteBufferDecodingState.cpp" + "${HDFS3_SOURCE_DIR}/client/ByteBufferEncodingState.cpp" + "${HDFS3_SOURCE_DIR}/client/CoderUtil.cpp" + "${HDFS3_SOURCE_DIR}/client/ECChunk.cpp" + "${HDFS3_SOURCE_DIR}/client/ErasureCoderOptions.cpp" + "${HDFS3_SOURCE_DIR}/client/GF256.cpp" + "${HDFS3_SOURCE_DIR}/client/GaloisField.cpp" + "${HDFS3_SOURCE_DIR}/client/NativeRSRawDecoder.cpp" + "${HDFS3_SOURCE_DIR}/client/NativeRSRawEncoder.cpp" + "${HDFS3_SOURCE_DIR}/client/Preconditions.cpp" + "${HDFS3_SOURCE_DIR}/client/RSUtil.cpp" + "${HDFS3_SOURCE_DIR}/client/RawErasureCoderFactory.cpp" + "${HDFS3_SOURCE_DIR}/client/RawErasureDecoder.cpp" + "${HDFS3_SOURCE_DIR}/client/RawErasureEncoder.cpp" + "${HDFS3_SOURCE_DIR}/client/StatefulStripeReader.cpp" + "${HDFS3_SOURCE_DIR}/client/StripeReader.cpp" + "${HDFS3_SOURCE_DIR}/client/StripedBlockUtil.cpp" + "${HDFS3_SOURCE_DIR}/client/StripedInputStreamImpl.cpp" + "${HDFS3_SOURCE_DIR}/client/StripedOutputStreamImpl.cpp" + "${HDFS3_SOURCE_DIR}/client/SystemECPolicies.cpp" + "${HDFS3_SOURCE_DIR}/client/dump.cpp" + "${HDFS3_SOURCE_DIR}/client/erasure_coder.cpp" "${HDFS3_SOURCE_DIR}/rpc/RpcChannelKey.cpp" "${HDFS3_SOURCE_DIR}/rpc/RpcProtocolInfo.cpp" "${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp" @@ -148,6 +172,9 @@ if (TARGET OpenSSL::SSL) target_link_libraries(_hdfs3 PRIVATE OpenSSL::Crypto OpenSSL::SSL) endif() +target_link_libraries(_hdfs3 PRIVATE ch_contrib::isal) +add_definitions(-DHADOOP_ISAL_LIBRARY) + add_library(ch_contrib::hdfs ALIAS _hdfs3) if (ENABLE_CLICKHOUSE_BENCHMARK) diff --git a/contrib/libuv-cmake/CMakeLists.txt b/contrib/libuv-cmake/CMakeLists.txt index 7ca2cef2251..fb88799ed38 100644 --- a/contrib/libuv-cmake/CMakeLists.txt +++ b/contrib/libuv-cmake/CMakeLists.txt @@ -1,9 +1,3 @@ -# once fixed, please remove similar places in CMakeLists of libuv users (search "ch_contrib::uv") -if (OS_DARWIN AND COMPILER_GCC) - message (WARNING "libuv cannot be built with GCC in macOS due to a bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93082") - return() -endif() - # This file is a modified version of contrib/libuv/CMakeLists.txt set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/libuv") diff --git a/contrib/llvm-project b/contrib/llvm-project index e0accd51793..2aedf7598a4 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit e0accd517933ebb44aff84bc8db448ffd8ef1929 +Subproject commit 2aedf7598a4040b23881dbe05b6afaca25a337ef diff --git a/contrib/qpl b/contrib/qpl index d75a29d95d8..0bce2b03423 160000 --- a/contrib/qpl +++ b/contrib/qpl @@ -1 +1 @@ -Subproject commit d75a29d95d8a548297fce3549d21020005364dc8 +Subproject commit 0bce2b03423f6fbeb8bce66cc8be0bf558058848 diff --git a/contrib/qpl-cmake/CMakeLists.txt b/contrib/qpl-cmake/CMakeLists.txt index fc5548b0652..334731d105f 100644 --- a/contrib/qpl-cmake/CMakeLists.txt +++ b/contrib/qpl-cmake/CMakeLists.txt @@ -40,9 +40,10 @@ set (LOG_HW_INIT OFF) set (SANITIZE_MEMORY OFF) set (SANITIZE_THREADS OFF) set (LIB_FUZZING_ENGINE OFF) +set (DYNAMIC_LOADING_LIBACCEL_CONFIG OFF) function(GetLibraryVersion _content _outputVar) - string(REGEX MATCHALL "Qpl VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}") + string(REGEX MATCHALL "QPL VERSION (.+) LANGUAGES" VERSION_REGEX "${_content}") SET(${_outputVar} ${CMAKE_MATCH_1} PARENT_SCOPE) endfunction() @@ -240,7 +241,9 @@ add_library(core_iaa OBJECT ${HW_PATH_SRC}) target_include_directories(core_iaa PRIVATE ${UUID_DIR} PUBLIC $ - PRIVATE $ + PUBLIC $ + PRIVATE $ # status.h in own_checkers.h + PRIVATE $ # own_checkers.h PRIVATE $) target_compile_options(core_iaa @@ -339,4 +342,7 @@ target_link_libraries(_qpl PRIVATE ${CMAKE_DL_LIBS}) add_library (ch_contrib::qpl ALIAS _qpl) -target_include_directories(_qpl SYSTEM BEFORE PUBLIC "${QPL_PROJECT_DIR}/include") +target_include_directories(_qpl SYSTEM BEFORE + PUBLIC "${QPL_PROJECT_DIR}/include" + PUBLIC "${LIBACCEL_SOURCE_DIR}/accfg" + PUBLIC ${UUID_DIR}) diff --git a/contrib/sparse-checkout/setup-sparse-checkout.sh b/contrib/sparse-checkout/setup-sparse-checkout.sh new file mode 100755 index 00000000000..3feba6c5adf --- /dev/null +++ b/contrib/sparse-checkout/setup-sparse-checkout.sh @@ -0,0 +1,19 @@ +#!/bin/sh + +set -e + +git config submodule."contrib/llvm-project".update '!../sparse-checkout/update-llvm-project.sh' +git config submodule."contrib/croaring".update '!../sparse-checkout/update-croaring.sh' +git config submodule."contrib/aws".update '!../sparse-checkout/update-aws.sh' +git config submodule."contrib/openssl".update '!../sparse-checkout/update-openssl.sh' +git config submodule."contrib/boringssl".update '!../sparse-checkout/update-boringssl.sh' +git config submodule."contrib/arrow".update '!../sparse-checkout/update-arrow.sh' +git config submodule."contrib/grpc".update '!../sparse-checkout/update-grpc.sh' +git config submodule."contrib/orc".update '!../sparse-checkout/update-orc.sh' +git config submodule."contrib/h3".update '!../sparse-checkout/update-h3.sh' +git config submodule."contrib/icu".update '!../sparse-checkout/update-icu.sh' +git config submodule."contrib/boost".update '!../sparse-checkout/update-boost.sh' +git config submodule."contrib/aws-s2n-tls".update '!../sparse-checkout/update-aws-s2n-tls.sh' +git config submodule."contrib/protobuf".update '!../sparse-checkout/update-protobuf.sh' +git config submodule."contrib/libxml2".update '!../sparse-checkout/update-libxml2.sh' +git config submodule."contrib/brotli".update '!../sparse-checkout/update-brotli.sh' diff --git a/contrib/sparse-checkout/update-arrow.sh b/contrib/sparse-checkout/update-arrow.sh new file mode 100755 index 00000000000..e004b60da02 --- /dev/null +++ b/contrib/sparse-checkout/update-arrow.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for arrow" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/cpp/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-aws-s2n-tls.sh b/contrib/sparse-checkout/update-aws-s2n-tls.sh new file mode 100755 index 00000000000..4d65dc4b81d --- /dev/null +++ b/contrib/sparse-checkout/update-aws-s2n-tls.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +echo "Using sparse checkout for aws-s2n-tls" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/test/*' >> $FILES_TO_CHECKOUT +echo '!/docs/*' >> $FILES_TO_CHECKOUT +echo '!/compliance/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-aws.sh b/contrib/sparse-checkout/update-aws.sh new file mode 100755 index 00000000000..c8d4c5a89c2 --- /dev/null +++ b/contrib/sparse-checkout/update-aws.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +echo "Using sparse checkout for aws" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT +echo '/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-boost.sh b/contrib/sparse-checkout/update-boost.sh new file mode 100755 index 00000000000..9bd1f6c1796 --- /dev/null +++ b/contrib/sparse-checkout/update-boost.sh @@ -0,0 +1,85 @@ +#!/bin/sh + +echo "Using sparse checkout for boost" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/boost/*' > $FILES_TO_CHECKOUT +echo '!/boost/*/*' >> $FILES_TO_CHECKOUT +echo '/boost/algorithm/*' >> $FILES_TO_CHECKOUT +echo '/boost/any/*' >> $FILES_TO_CHECKOUT +echo '/boost/atomic/*' >> $FILES_TO_CHECKOUT +echo '/boost/assert/*' >> $FILES_TO_CHECKOUT +echo '/boost/bind/*' >> $FILES_TO_CHECKOUT +echo '/boost/concept/*' >> $FILES_TO_CHECKOUT +echo '/boost/config/*' >> $FILES_TO_CHECKOUT +echo '/boost/container/*' >> $FILES_TO_CHECKOUT +echo '/boost/container_hash/*' >> $FILES_TO_CHECKOUT +echo '/boost/context/*' >> $FILES_TO_CHECKOUT +echo '/boost/convert/*' >> $FILES_TO_CHECKOUT +echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT +echo '/boost/core/*' >> $FILES_TO_CHECKOUT +echo '/boost/detail/*' >> $FILES_TO_CHECKOUT +echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT +echo '/boost/exception/*' >> $FILES_TO_CHECKOUT +echo '/boost/filesystem/*' >> $FILES_TO_CHECKOUT +echo '/boost/functional/*' >> $FILES_TO_CHECKOUT +echo '/boost/function/*' >> $FILES_TO_CHECKOUT +echo '/boost/geometry/*' >> $FILES_TO_CHECKOUT +echo '/boost/graph/*' >> $FILES_TO_CHECKOUT +echo '/boost/heap/*' >> $FILES_TO_CHECKOUT +echo '/boost/integer/*' >> $FILES_TO_CHECKOUT +echo '/boost/intrusive/*' >> $FILES_TO_CHECKOUT +echo '/boost/iostreams/*' >> $FILES_TO_CHECKOUT +echo '/boost/io/*' >> $FILES_TO_CHECKOUT +echo '/boost/iterator/*' >> $FILES_TO_CHECKOUT +echo '/boost/math/*' >> $FILES_TO_CHECKOUT +echo '/boost/move/*' >> $FILES_TO_CHECKOUT +echo '/boost/mpl/*' >> $FILES_TO_CHECKOUT +echo '/boost/multi_index/*' >> $FILES_TO_CHECKOUT +echo '/boost/multiprecision/*' >> $FILES_TO_CHECKOUT +echo '/boost/numeric/*' >> $FILES_TO_CHECKOUT +echo '/boost/predef/*' >> $FILES_TO_CHECKOUT +echo '/boost/preprocessor/*' >> $FILES_TO_CHECKOUT +echo '/boost/program_options/*' >> $FILES_TO_CHECKOUT +echo '/boost/range/*' >> $FILES_TO_CHECKOUT +echo '/boost/regex/*' >> $FILES_TO_CHECKOUT +echo '/boost/smart_ptr/*' >> $FILES_TO_CHECKOUT +echo '/boost/type_index/*' >> $FILES_TO_CHECKOUT +echo '/boost/type_traits/*' >> $FILES_TO_CHECKOUT +echo '/boost/system/*' >> $FILES_TO_CHECKOUT +echo '/boost/tti/*' >> $FILES_TO_CHECKOUT +echo '/boost/utility/*' >> $FILES_TO_CHECKOUT +echo '/boost/lexical_cast/*' >> $FILES_TO_CHECKOUT +echo '/boost/optional/*' >> $FILES_TO_CHECKOUT +echo '/boost/property_map/*' >> $FILES_TO_CHECKOUT +echo '/boost/pending/*' >> $FILES_TO_CHECKOUT +echo '/boost/multi_array/*' >> $FILES_TO_CHECKOUT +echo '/boost/tuple/*' >> $FILES_TO_CHECKOUT +echo '/boost/icl/*' >> $FILES_TO_CHECKOUT +echo '/boost/unordered/*' >> $FILES_TO_CHECKOUT +echo '/boost/typeof/*' >> $FILES_TO_CHECKOUT +echo '/boost/parameter/*' >> $FILES_TO_CHECKOUT +echo '/boost/mp11/*' >> $FILES_TO_CHECKOUT +echo '/boost/archive/*' >> $FILES_TO_CHECKOUT +echo '/boost/function_types/*' >> $FILES_TO_CHECKOUT +echo '/boost/serialization/*' >> $FILES_TO_CHECKOUT +echo '/boost/fusion/*' >> $FILES_TO_CHECKOUT +echo '/boost/variant/*' >> $FILES_TO_CHECKOUT +echo '/boost/format/*' >> $FILES_TO_CHECKOUT +echo '/boost/locale/*' >> $FILES_TO_CHECKOUT +echo '/boost/random/*' >> $FILES_TO_CHECKOUT +echo '/boost/spirit/*' >> $FILES_TO_CHECKOUT +echo '/boost/uuid/*' >> $FILES_TO_CHECKOUT +echo '/boost/xpressive/*' >> $FILES_TO_CHECKOUT +echo '/boost/asio/*' >> $FILES_TO_CHECKOUT +echo '/boost/circular_buffer/*' >> $FILES_TO_CHECKOUT +echo '/boost/proto/*' >> $FILES_TO_CHECKOUT +echo '/boost/qvm/*' >> $FILES_TO_CHECKOUT +echo '/boost/property_tree/*' >> $FILES_TO_CHECKOUT +echo '/libs/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD \ No newline at end of file diff --git a/contrib/sparse-checkout/update-boringssl.sh b/contrib/sparse-checkout/update-boringssl.sh new file mode 100755 index 00000000000..f877a78afed --- /dev/null +++ b/contrib/sparse-checkout/update-boringssl.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +echo "Using sparse checkout for boringsll" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/fuzz/*' >> $FILES_TO_CHECKOUT +echo '!/crypto/cipher_extra/test/*' >> $FILES_TO_CHECKOUT +echo '!/third_party/wycheproof_testvectors/*' >> $FILES_TO_CHECKOUT +echo '!/third_party/googletest/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-brotli.sh b/contrib/sparse-checkout/update-brotli.sh new file mode 100755 index 00000000000..8784f5e4125 --- /dev/null +++ b/contrib/sparse-checkout/update-brotli.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for brotli" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/c/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-croaring.sh b/contrib/sparse-checkout/update-croaring.sh new file mode 100755 index 00000000000..9b7bba19df4 --- /dev/null +++ b/contrib/sparse-checkout/update-croaring.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for croaring" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/benchmarks/*' >> $FILES_TO_CHECKOUT +echo '!/tests/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-grpc.sh b/contrib/sparse-checkout/update-grpc.sh new file mode 100755 index 00000000000..38934fdbc1b --- /dev/null +++ b/contrib/sparse-checkout/update-grpc.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +echo "Using sparse checkout for grpc" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/test/*' >> $FILES_TO_CHECKOUT +echo '/test/build/*' >> $FILES_TO_CHECKOUT +echo '!/tools/*' >> $FILES_TO_CHECKOUT +echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT +echo '!/examples/*' >> $FILES_TO_CHECKOUT +echo '!/doc/*' >> $FILES_TO_CHECKOUT +# FIXME why do we need csharp? +#echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT +echo '!/src/python/*' >> $FILES_TO_CHECKOUT +echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT +echo '!/src/php/*' >> $FILES_TO_CHECKOUT +echo '!/src/ruby/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-h3.sh b/contrib/sparse-checkout/update-h3.sh new file mode 100755 index 00000000000..127885f89cc --- /dev/null +++ b/contrib/sparse-checkout/update-h3.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for h3" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/tests/*' >> $FILES_TO_CHECKOUT +echo '!/website/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-icu.sh b/contrib/sparse-checkout/update-icu.sh new file mode 100755 index 00000000000..76af39f07a4 --- /dev/null +++ b/contrib/sparse-checkout/update-icu.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +echo "Using sparse checkout for icu" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/icu4c/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD \ No newline at end of file diff --git a/contrib/sparse-checkout/update-libxml2.sh b/contrib/sparse-checkout/update-libxml2.sh new file mode 100755 index 00000000000..24faf11eec9 --- /dev/null +++ b/contrib/sparse-checkout/update-libxml2.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +echo "Using sparse checkout for libxml2" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/result/*' >> $FILES_TO_CHECKOUT +echo '!/test/*' >> $FILES_TO_CHECKOUT +echo '!/doc/*' >> $FILES_TO_CHECKOUT +echo '!/os400/*' >> $FILES_TO_CHECKOUT +echo '!/fuzz/*' >> $FILES_TO_CHECKOUT +echo '!/python/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-llvm-project.sh b/contrib/sparse-checkout/update-llvm-project.sh new file mode 100755 index 00000000000..53c3b691d3a --- /dev/null +++ b/contrib/sparse-checkout/update-llvm-project.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +echo "Using sparse checkout for llvm-project" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/llvm/*' >> $FILES_TO_CHECKOUT +echo '!/llvm/*/*' >> $FILES_TO_CHECKOUT +echo '/llvm/cmake/*' >> $FILES_TO_CHECKOUT +echo '/llvm/projects/*' >> $FILES_TO_CHECKOUT +echo '/llvm/include/*' >> $FILES_TO_CHECKOUT +echo '/llvm/lib/*' >> $FILES_TO_CHECKOUT +echo '/llvm/utils/TableGen/*' >> $FILES_TO_CHECKOUT +echo '/libcxxabi/*' >> $FILES_TO_CHECKOUT +echo '!/libcxxabi/test/*' >> $FILES_TO_CHECKOUT +echo '/libcxx/*' >> $FILES_TO_CHECKOUT +echo '!/libcxx/test/*' >> $FILES_TO_CHECKOUT +echo '/libunwind/*' >> $FILES_TO_CHECKOUT +echo '!/libunwind/test/*' >> $FILES_TO_CHECKOUT +echo '/compiler-rt/*' >> $FILES_TO_CHECKOUT +echo '!/compiler-rt/test/*' >> $FILES_TO_CHECKOUT +echo '/cmake/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-openssl.sh b/contrib/sparse-checkout/update-openssl.sh new file mode 100755 index 00000000000..33e19f43cb7 --- /dev/null +++ b/contrib/sparse-checkout/update-openssl.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +echo "Using sparse checkout for openssl" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/fuzz/*' >> $FILES_TO_CHECKOUT +echo '!/test/*' >> $FILES_TO_CHECKOUT +echo '!/doc/*' >> $FILES_TO_CHECKOUT +echo '!/providers/*' >> $FILES_TO_CHECKOUT +echo '!/apps/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-orc.sh b/contrib/sparse-checkout/update-orc.sh new file mode 100755 index 00000000000..57ab57a8d52 --- /dev/null +++ b/contrib/sparse-checkout/update-orc.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +echo "Using sparse checkout for orc" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '/*' > $FILES_TO_CHECKOUT +echo '!/*/*' >> $FILES_TO_CHECKOUT +echo '/c++/*' >> $FILES_TO_CHECKOUT +echo '/proto/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sparse-checkout/update-protobuf.sh b/contrib/sparse-checkout/update-protobuf.sh new file mode 100755 index 00000000000..31c037c2cf5 --- /dev/null +++ b/contrib/sparse-checkout/update-protobuf.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +echo "Using sparse checkout for protobuf" + +FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout +echo '!/*' > $FILES_TO_CHECKOUT +echo '/*/*' >> $FILES_TO_CHECKOUT +echo '/src/*' >> $FILES_TO_CHECKOUT +echo '/cmake/*' >> $FILES_TO_CHECKOUT + +git config core.sparsecheckout true +git checkout $1 +git read-tree -mu HEAD diff --git a/contrib/sysroot b/contrib/sysroot index f0081b2649b..e0d1b64da66 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit f0081b2649b94837855f3bc7d05ef326b100bad8 +Subproject commit e0d1b64da666afbfaa6f1ee0487c33f3fd2cd5cb diff --git a/contrib/update-submodules.sh b/contrib/update-submodules.sh new file mode 100755 index 00000000000..c94681e6240 --- /dev/null +++ b/contrib/update-submodules.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +set -e + +WORKDIR=$(dirname "$0") +WORKDIR=$(readlink -f "${WORKDIR}") + +"$WORKDIR/sparse-checkout/setup-sparse-checkout.sh" +git submodule init +git submodule sync +git submodule update --depth=1 diff --git a/contrib/zstd b/contrib/zstd index 945f27758c0..63779c79823 160000 --- a/contrib/zstd +++ b/contrib/zstd @@ -1 +1 @@ -Subproject commit 945f27758c0fd67b636103a38dbf050266c6b90a +Subproject commit 63779c798237346c2b245c546c40b72a5a5913fe diff --git a/docker/docs/builder/Dockerfile b/docker/docs/builder/Dockerfile index 5946687dbef..3ca2bdafcb3 100644 --- a/docker/docs/builder/Dockerfile +++ b/docker/docs/builder/Dockerfile @@ -1,4 +1,3 @@ -# rebuild in #36968 # docker build -t clickhouse/docs-builder . # nodejs 17 prefers ipv6 and is broken in our environment FROM node:16-alpine diff --git a/docker/images.json b/docker/images.json index 508138d79af..9150abe1f1c 100644 --- a/docker/images.json +++ b/docker/images.json @@ -151,5 +151,9 @@ "name": "clickhouse/docs-builder", "dependent": [ ] + }, + "docker/test/sqllogic": { + "name": "clickhouse/sqllogic-test", + "dependent": [] } } diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 37b58758b9e..59e8d2ed3d8 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.3.1.2823" +ARG VERSION="23.4.1.1943" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 822aa752655..d59a08c2805 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.3.1.2823" +ARG VERSION="23.4.1.1943" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index ec7e164e51f..390f347d549 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -22,7 +22,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.3.1.2823" +ARG VERSION="23.4.1.1943" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index c6c9fbca421..f6836804454 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -18,13 +18,13 @@ RUN apt-get update \ # and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB). # TSAN will flush shadow memory when reaching this limit. # It may cause false-negatives, but it's better than OOM. -RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment +RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # (but w/o verbosity for TSAN, otherwise test.reference will not match) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080' +ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index e0e30a63bb4..3ed0c4df093 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -194,7 +194,12 @@ function build { ( cd "$FASTTEST_BUILD" - time ninja clickhouse-bundle 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" + TIMEFORMAT=$'\nreal\t%3R\nuser\t%3U\nsys\t%3S' + ( time ninja clickhouse-bundle) |& ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/build_log.txt" + BUILD_SECONDS_ELAPSED=$(awk '/^....-..-.. ..:..:.. real\t[0-9]/ {print $4}' < "$FASTTEST_OUTPUT/build_log.txt") + echo "build_clickhouse_fasttest_binary: [ OK ] $BUILD_SECONDS_ELAPSED sec." \ + | ts '%Y-%m-%d %H:%M:%S' \ + | tee "$FASTTEST_OUTPUT/test_result.txt" if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" @@ -251,7 +256,7 @@ function run_tests ) time clickhouse-test "${test_opts[@]}" -- "$FASTTEST_FOCUS" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ - | tee "$FASTTEST_OUTPUT/test_result.txt" + | tee -a "$FASTTEST_OUTPUT/test_result.txt" set -e clickhouse stop --pid-path "$FASTTEST_DATA" diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index ce5bae2a031..14c97e479f6 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -32,6 +32,7 @@ RUN apt-get update \ libssl-dev \ libcurl4-openssl-dev \ gdb \ + default-jdk \ software-properties-common \ libkrb5-dev \ krb5-user \ @@ -46,10 +47,9 @@ ENV TZ=Etc/UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable -RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - -RUN add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" - -RUN apt-get update \ +RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ + && add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" \ + && apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ docker-ce \ && rm -rf \ @@ -60,7 +60,7 @@ RUN apt-get update \ RUN dockerd --version; docker --version -RUN python3 -m pip install \ +RUN python3 -m pip install --no-cache-dir \ PyMySQL \ aerospike==4.0.0 \ avro==1.10.2 \ @@ -92,15 +92,25 @@ RUN python3 -m pip install \ tzlocal==2.1 \ urllib3 \ requests-kerberos \ + pyspark==3.3.2 \ + delta-spark==2.2.0 \ pyhdfs \ azure-storage-blob \ - meilisearch==0.18.3 + meilisearch==0.18.3 COPY modprobe.sh /usr/local/bin/modprobe COPY dockerd-entrypoint.sh /usr/local/bin/ COPY compose/ /compose/ COPY misc/ /misc/ +RUN curl -fsSL -O https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ + && tar xzvf spark-3.3.2-bin-hadoop3.tgz -C / \ + && rm spark-3.3.2-bin-hadoop3.tgz + +# download spark and packages +# if you change packages, don't forget to update them in tests/integration/helpers/cluster.py +RUN echo ":quit" | /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.2.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" > /dev/null + RUN set -x \ && addgroup --system dockremap \ && adduser --system dockremap \ @@ -108,6 +118,12 @@ RUN set -x \ && echo 'dockremap:165536:65536' >> /etc/subuid \ && echo 'dockremap:165536:65536' >> /etc/subgid +# Same options as in test/base/Dockerfile +# (in case you need to override them in tests) +ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV UBSAN_OPTIONS='print_stacktrace=1' +ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' + EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] CMD ["sh", "-c", "pytest $PYTEST_OPTS"] diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index c16b2bf1087..fe47fc90951 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -16,7 +16,9 @@ echo '{ # and on hung you can simply press Ctrl-C and it will spawn a python pdb, # but on SIGINT dockerd will exit, so ignore it to preserve the daemon. trap '' INT -dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & +# Binding to an IP address without --tlsverify is deprecated. Startup is intentionally being slowed +# unless --tls=false or --tlsverify=false is set +dockerd --host=unix:///var/run/docker.sock --tls=false --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & set +e reties=0 @@ -37,6 +39,12 @@ set -e docker ps --all --quiet | xargs --no-run-if-empty docker rm || true } +java_path="$(update-alternatives --config java | sed -n 's/.*(providing \/usr\/bin\/java): //p')" +export JAVA_PATH=$java_path +export SPARK_HOME="/spark-3.3.2-bin-hadoop3" +export PATH=$SPARK_HOME/bin:$PATH +export JAVA_TOOL_OPTIONS="-Djdk.attach.allowAttachSelf=true" + echo "Start tests" export CLICKHOUSE_TESTS_SERVER_BIN_PATH=/clickhouse export CLICKHOUSE_TESTS_CLIENT_BIN_PATH=/clickhouse diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile new file mode 100644 index 00000000000..83dcf7e1f56 --- /dev/null +++ b/docker/test/sqllogic/Dockerfile @@ -0,0 +1,45 @@ +# docker build -t clickhouse/sqllogic-test . +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG + +RUN apt-get update --yes \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + wget \ + git \ + python3 \ + python3-dev \ + python3-pip \ + sqlite3 \ + unixodbc \ + unixodbc-dev \ + sudo \ + && apt-get clean + +RUN pip3 install \ + numpy \ + pyodbc \ + deepdiff + +ARG odbc_repo="https://github.com/ClickHouse/clickhouse-odbc.git" + +RUN git clone --recursive ${odbc_repo} \ + && mkdir -p /clickhouse-odbc/build \ + && cmake -S /clickhouse-odbc -B /clickhouse-odbc/build \ + && ls /clickhouse-odbc/build/driver \ + && make -j 10 -C /clickhouse-odbc/build \ + && ls /clickhouse-odbc/build/driver \ + && mkdir -p /usr/local/lib64/ && cp /clickhouse-odbc/build/driver/lib*.so /usr/local/lib64/ \ + && odbcinst -i -d -f /clickhouse-odbc/packaging/odbcinst.ini.sample \ + && odbcinst -i -s -l -f /clickhouse-odbc/packaging/odbc.ini.sample + +ENV TZ=Europe/Amsterdam +ENV MAX_RUN_TIME=900 +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" + +RUN git clone --recursive ${sqllogic_test_repo} + +COPY run.sh / +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh new file mode 100755 index 00000000000..8d0252e3c98 --- /dev/null +++ b/docker/test/sqllogic/run.sh @@ -0,0 +1,100 @@ +#!/bin/bash +set -exu +trap "exit" INT TERM + +echo "ENV" +env + +# fail on errors, verbose and export all env variables +set -e -x -a + +echo "Current directory" +pwd +echo "Files in current directory" +ls -la ./ +echo "Files in root directory" +ls -la / +echo "Files in /clickhouse-tests directory" +ls -la /clickhouse-tests +echo "Files in /clickhouse-tests/sqllogic directory" +ls -la /clickhouse-tests/sqllogic +echo "Files in /package_folder directory" +ls -la /package_folder +echo "Files in /test_output" +ls -la /test_output +echo "File in /sqllogictest" +ls -la /sqllogictest + +dpkg -i package_folder/clickhouse-common-static_*.deb +dpkg -i package_folder/clickhouse-common-static-dbg_*.deb +dpkg -i package_folder/clickhouse-server_*.deb +dpkg -i package_folder/clickhouse-client_*.deb + +# install test configs +# /clickhouse-tests/config/install.sh + +sudo clickhouse start + +sleep 5 +for _ in $(seq 1 60); do if [[ $(wget --timeout=1 -q 'localhost:8123' -O-) == 'Ok.' ]]; then break ; else sleep 1; fi ; done + +function run_tests() +{ + set -x + + cd /test_output + + /clickhouse-tests/sqllogic/runner.py --help 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' + + mkdir -p /test_output/self-test + /clickhouse-tests/sqllogic/runner.py --log-file /test_output/runner-self-test.log \ + self-test \ + --self-test-dir /clickhouse-tests/sqllogic/self-test \ + --out-dir /test_output/self-test \ + 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' + + cat /test_output/self-test/check_status.tsv >> /test_output/check_status.tsv + cat /test_output/self-test/test_results.tsv >> /test_output/test_results.tsv ||: + tar -zcvf self-test.tar.gz self-test 1>/dev/null + + if [ -d /sqllogictest ] + then + mkdir -p /test_output/statements-test + /clickhouse-tests/sqllogic/runner.py \ + --log-file /test_output/runner-statements-test.log \ + --log-level info \ + statements-test \ + --input-dir /sqllogictest \ + --out-dir /test_output/statements-test \ + 2>&1 \ + | ts '%Y-%m-%d %H:%M:%S' + + cat /test_output/statements-test/check_status.tsv >> /test_output/check_status.tsv + cat /test_output/statements-test/test_results.tsv >> /test_output/test_results.tsv + tar -zcvf statements-check.tar.gz statements-test 1>/dev/null + fi +} + +export -f run_tests + +timeout "${MAX_RUN_TIME:-9000}" bash -c run_tests || echo "timeout reached" >&2 + +#/process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv + +clickhouse-client -q "system flush logs" ||: + +# Stop server so we can safely read data with clickhouse-local. +# Why do we read data with clickhouse-local? +# Because it's the simplest way to read it when server has crashed. +sudo clickhouse stop ||: + +for _ in $(seq 1 60); do if [[ $(wget --timeout=1 -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done + +grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: +pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz & + +# Compressed (FIXME: remove once only github actions will be left) +rm /var/log/clickhouse-server/clickhouse-server.log +mv /var/log/clickhouse-server/stderr.log /test_output/ ||: diff --git a/docker/test/util/process_functional_tests_result.py b/docker/test/util/process_functional_tests_result.py index da58db8e45d..3c1c6e2a795 100755 --- a/docker/test/util/process_functional_tests_result.py +++ b/docker/test/util/process_functional_tests_result.py @@ -18,7 +18,7 @@ SUCCESS_FINISH_SIGNS = ["All tests have finished", "No tests were run"] RETRIES_SIGN = "Some tests were restarted" -def process_test_log(log_path): +def process_test_log(log_path, broken_tests): total = 0 skipped = 0 unknown = 0 @@ -62,8 +62,12 @@ def process_test_log(log_path): failed += 1 test_results.append((test_name, "Timeout", test_time, [])) elif FAIL_SIGN in line: - failed += 1 - test_results.append((test_name, "FAIL", test_time, [])) + if test_name in broken_tests: + success += 1 + test_results.append((test_name, "OK", test_time, [])) + else: + failed += 1 + test_results.append((test_name, "FAIL", test_time, [])) elif UNKNOWN_SIGN in line: unknown += 1 test_results.append((test_name, "FAIL", test_time, [])) @@ -71,8 +75,21 @@ def process_test_log(log_path): skipped += 1 test_results.append((test_name, "SKIPPED", test_time, [])) else: - success += int(OK_SIGN in line) - test_results.append((test_name, "OK", test_time, [])) + if OK_SIGN in line and test_name in broken_tests: + failed += 1 + test_results.append( + ( + test_name, + "FAIL", + test_time, + [ + "Test is expected to fail! Please, update broken_tests.txt!\n" + ], + ) + ) + else: + success += int(OK_SIGN in line) + test_results.append((test_name, "OK", test_time, [])) test_end = False elif ( len(test_results) > 0 and test_results[-1][1] == "FAIL" and not test_end @@ -110,7 +127,7 @@ def process_test_log(log_path): ) -def process_result(result_path): +def process_result(result_path, broken_tests): test_results = [] state = "success" description = "" @@ -134,7 +151,7 @@ def process_result(result_path): success_finish, retries, test_results, - ) = process_test_log(result_path) + ) = process_test_log(result_path, broken_tests) is_flacky_check = 1 < int(os.environ.get("NUM_TRIES", 1)) logging.info("Is flaky check: %s", is_flacky_check) # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately) @@ -186,9 +203,17 @@ if __name__ == "__main__": parser.add_argument("--in-results-dir", default="/test_output/") parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") + parser.add_argument("--broken-tests", default="/broken_tests.txt") args = parser.parse_args() - state, description, test_results = process_result(args.in_results_dir) + broken_tests = list() + if os.path.exists(args.broken_tests): + logging.info(f"File {args.broken_tests} with broken tests found") + with open(args.broken_tests) as f: + broken_tests = f.read().splitlines() + logging.info(f"Broken tests in the list: {len(broken_tests)}") + + state, description, test_results = process_result(args.in_results_dir, broken_tests) logging.info("Result parsed") status = (state, description) write_results(args.out_results_file, args.out_status_file, test_results, status) diff --git a/docs/changelogs/v22.8.17.17-lts.md b/docs/changelogs/v22.8.17.17-lts.md new file mode 100644 index 00000000000..9c8c3e1839b --- /dev/null +++ b/docs/changelogs/v22.8.17.17-lts.md @@ -0,0 +1,22 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.17.17-lts (df7f2ef0b41) FIXME as compared to v22.8.16.32-lts (7c4be737bd0) + +#### Improvement +* Backported in [#48157](https://github.com/ClickHouse/ClickHouse/issues/48157): Fixed `UNKNOWN_TABLE` exception when attaching to a materialized view that has dependent tables that are not available. This might be useful when trying to restore state from a backup. [#47975](https://github.com/ClickHouse/ClickHouse/pull/47975) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#48957](https://github.com/ClickHouse/ClickHouse/issues/48957): After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix explain graph with projection [#47473](https://github.com/ClickHouse/ClickHouse/pull/47473) ([flynn](https://github.com/ucasfl)). +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). + diff --git a/docs/changelogs/v23.1.7.30-stable.md b/docs/changelogs/v23.1.7.30-stable.md new file mode 100644 index 00000000000..80f683f6ac8 --- /dev/null +++ b/docs/changelogs/v23.1.7.30-stable.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.7.30-stable (c94dba6e023) FIXME as compared to v23.1.6.42-stable (783ddf67991) + +#### Improvement +* Backported in [#48161](https://github.com/ClickHouse/ClickHouse/issues/48161): Fixed `UNKNOWN_TABLE` exception when attaching to a materialized view that has dependent tables that are not available. This might be useful when trying to restore state from a backup. [#47975](https://github.com/ClickHouse/ClickHouse/pull/47975) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). + +#### Build/Testing/Packaging Improvement +* Backported in [#48585](https://github.com/ClickHouse/ClickHouse/issues/48585): Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#48958](https://github.com/ClickHouse/ClickHouse/issues/48958): After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix race in grace hash join with limit [#47153](https://github.com/ClickHouse/ClickHouse/pull/47153) ([Vladimir C](https://github.com/vdimir)). +* Fix explain graph with projection [#47473](https://github.com/ClickHouse/ClickHouse/pull/47473) ([flynn](https://github.com/ucasfl)). +* Fix crash in polygonsSymDifferenceCartesian [#47702](https://github.com/ClickHouse/ClickHouse/pull/47702) ([pufit](https://github.com/pufit)). +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ClickHouse startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix IPv4 comparable with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + diff --git a/docs/changelogs/v23.2.6.34-stable.md b/docs/changelogs/v23.2.6.34-stable.md new file mode 100644 index 00000000000..c6f73da843d --- /dev/null +++ b/docs/changelogs/v23.2.6.34-stable.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.2.6.34-stable (570190045b0) FIXME as compared to v23.2.5.46-stable (b50faecbb12) + +#### Improvement +* Backported in [#48709](https://github.com/ClickHouse/ClickHouse/issues/48709): Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Build/Testing/Packaging Improvement +* Backported in [#48587](https://github.com/ClickHouse/ClickHouse/issues/48587): Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#48959](https://github.com/ClickHouse/ClickHouse/issues/48959): After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix race in grace hash join with limit [#47153](https://github.com/ClickHouse/ClickHouse/pull/47153) ([Vladimir C](https://github.com/vdimir)). +* Fix explain graph with projection [#47473](https://github.com/ClickHouse/ClickHouse/pull/47473) ([flynn](https://github.com/ucasfl)). +* Fix crash in polygonsSymDifferenceCartesian [#47702](https://github.com/ClickHouse/ClickHouse/pull/47702) ([pufit](https://github.com/pufit)). +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cpu usage in rabbitmq (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). +* ClickHouse startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix IPv4 comparable with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + diff --git a/docs/changelogs/v23.3.2.37-lts.md b/docs/changelogs/v23.3.2.37-lts.md new file mode 100644 index 00000000000..69602b573c5 --- /dev/null +++ b/docs/changelogs/v23.3.2.37-lts.md @@ -0,0 +1,35 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.2.37-lts (1b144bcd101) FIXME as compared to v23.3.1.2823-lts (46e85357ce2) + +#### Improvement +* Backported in [#48459](https://github.com/ClickHouse/ClickHouse/issues/48459): Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#48842](https://github.com/ClickHouse/ClickHouse/issues/48842): Fix some mysql related settings not being handled with mysql dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#49035](https://github.com/ClickHouse/ClickHouse/issues/49035): Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### Build/Testing/Packaging Improvement +* Backported in [#48589](https://github.com/ClickHouse/ClickHouse/issues/48589): Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#48960](https://github.com/ClickHouse/ClickHouse/issues/48960): After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix cpu usage in rabbitmq (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ThreadPool for DistributedSink and use StrongTypedef for CurrentMetrics/ProfileEvents/StatusInfo to avoid further errors [#48314](https://github.com/ClickHouse/ClickHouse/pull/48314) ([Azat Khuzhin](https://github.com/azat)). +* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)). +* ClickHouse startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix IPv4 comparable with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Batch fix for projections analysis with analyzer. [#48357](https://github.com/ClickHouse/ClickHouse/pull/48357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a confusing warning about interserver mode [#48793](https://github.com/ClickHouse/ClickHouse/pull/48793) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v23.4.1.1943-stable.md b/docs/changelogs/v23.4.1.1943-stable.md new file mode 100644 index 00000000000..ea16f5856be --- /dev/null +++ b/docs/changelogs/v23.4.1.1943-stable.md @@ -0,0 +1,375 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.4.1.1943-stable (3920eb987f7) FIXME as compared to v23.3.1.2823-lts (46e85357ce2) + +#### Backward Incompatible Change +* If `path` in cache configuration is not empty and is not absolute path, then it will be put in `/caches/`. [#48784](https://github.com/ClickHouse/ClickHouse/pull/48784) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Compatibility setting `parallelize_output_from_storages` to enable behavior before [#48727](https://github.com/ClickHouse/ClickHouse/issues/48727). [#49101](https://github.com/ClickHouse/ClickHouse/pull/49101) ([Igor Nikonov](https://github.com/devcrafter)). + +#### New Feature +* Add `extractKeyValuePairs` function to extract key value pairs from strings. Input strings might contain noise (i.e log files / do not need to be 100% formatted in key-value-pair format), the algorithm will look for key value pairs matching the arguments passed to the function. As of now, function accepts the following arguments: `data_column` (mandatory), `key_value_pair_delimiter` (defaults to `:`), `pair_delimiters` (defaults to `\space \, \;`) and `quoting_character` (defaults to double quotes). [#43606](https://github.com/ClickHouse/ClickHouse/pull/43606) ([Arthur Passos](https://github.com/arthurpassos)). +* Add MemoryTracker for the background tasks (merges and mutation). Introduces `merges_mutations_memory_usage_soft_limit` and `merges_mutations_memory_usage_to_ram_ratio` settings that represent the soft memory limit for merges and mutations. If this limit is reached ClickHouse won't schedule new merge or mutation tasks. Also `MergesMutationsMemoryTracking` metric is introduced to allow observing current memory usage of background tasks. Closes [#45710](https://github.com/ClickHouse/ClickHouse/issues/45710). [#46089](https://github.com/ClickHouse/ClickHouse/pull/46089) ([Dmitry Novik](https://github.com/novikd)). +* Support new aggregate function quantileGK/quantilesGK, like [approx_percentile](https://spark.apache.org/docs/latest/api/sql/index.html#approx_percentile) in spark. Greenwald-Khanna algorithm refer to http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf. [#46428](https://github.com/ClickHouse/ClickHouse/pull/46428) ([李扬](https://github.com/taiyang-li)). +* Add statement `SHOW COLUMNS` which shows distilled information from system.columns. [#48017](https://github.com/ClickHouse/ClickHouse/pull/48017) ([Robert Schulze](https://github.com/rschu1ze)). +* Added `LIGHTWEIGHT` and `PULL` modifiers for `SYSTEM SYNC REPLICA` query. `LIGHTWEIGHT` version waits for fetches and drop-ranges only (merges and mutations are ignored). `PULL` version pulls new entries from ZooKeeper and does not wait for them. Fixes [#47794](https://github.com/ClickHouse/ClickHouse/issues/47794). [#48085](https://github.com/ClickHouse/ClickHouse/pull/48085) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add kafkaMurmurHash function for compatibility with Kafka DefaultPartitioner. Closes [#47834](https://github.com/ClickHouse/ClickHouse/issues/47834). [#48185](https://github.com/ClickHouse/ClickHouse/pull/48185) ([Nikolay Degterinsky](https://github.com/evillique)). +* Allow to easily create a user with the same grants as the current user by using `GRANT CURRENT GRANTS`. [#48262](https://github.com/ClickHouse/ClickHouse/pull/48262) ([pufit](https://github.com/pufit)). +* Add statistical aggregate function `kolmogorovSmirnovTest`. close [#48228](https://github.com/ClickHouse/ClickHouse/issues/48228). [#48325](https://github.com/ClickHouse/ClickHouse/pull/48325) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added a `lost_part_count` column to the `system.replicas` table. The column value shows the total number of lost parts in the corresponding table. Value is stored in zookeeper and can be used instead of not persistent `ReplicatedDataLoss` profile event for monitoring. [#48526](https://github.com/ClickHouse/ClickHouse/pull/48526) ([Sergei Trifonov](https://github.com/serxa)). +* Add soundex function. Closes [#39880](https://github.com/ClickHouse/ClickHouse/issues/39880). [#48567](https://github.com/ClickHouse/ClickHouse/pull/48567) ([FriendLey](https://github.com/FriendLey)). +* Support map type for JSONExtract. [#48629](https://github.com/ClickHouse/ClickHouse/pull/48629) ([李扬](https://github.com/taiyang-li)). +* Add PrettyJSONEachRow format to output pretty JSON with new line delimieters and 4 space indents. [#48898](https://github.com/ClickHouse/ClickHouse/pull/48898) ([Kruglov Pavel](https://github.com/Avogar)). +* Add ParquetMetadata input format to read Parquet file metadata. [#48911](https://github.com/ClickHouse/ClickHouse/pull/48911) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Reading files in Parquet format is now much faster. IO and decoding are parallelized (controlled by `max_threads` setting), and only required data ranges are read. [#47964](https://github.com/ClickHouse/ClickHouse/pull/47964) ([Michael Kolupaev](https://github.com/al13n321)). +* Only check dependencies if necessary when applying `ALTER TABLE` queries. [#48062](https://github.com/ClickHouse/ClickHouse/pull/48062) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize function `mapUpdate`. [#48118](https://github.com/ClickHouse/ClickHouse/pull/48118) ([Anton Popov](https://github.com/CurtizJ)). +* Now an internal query to local replica is sent explicitly and data from it received through loopback interface. Setting `prefer_localhost_replica` is not respected for parallel replicas. This is needed for better scheduling and makes the code cleaner: the initiator is only responsible for coordinating of the reading process and merging results, continiously answering for requests while all the secondary queries read the data. Note: Using loopback interface is not so performant, otherwise some replicas could starve for tasks which could lead to even slower query execution and not utilizing all possible resources. The initialization of the coordinator is now even more lazy. All incoming requests contain the information about the reading algorithm we initialize the coordinator with it when first request comes. If any replica will decide to read with different algorithm - an exception will be thrown and a query will be aborted. [#48246](https://github.com/ClickHouse/ClickHouse/pull/48246) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not build set for the right side of `IN` clause with subquery when it is used only for analysis of skip indexes and they are disabled by setting (`use_skip_indexes=0`). Previously it might affect the performance of queries. [#48299](https://github.com/ClickHouse/ClickHouse/pull/48299) ([Anton Popov](https://github.com/CurtizJ)). +* Query processing is parallelized right after reading `FROM file(...)`. Related to [#38755](https://github.com/ClickHouse/ClickHouse/issues/38755). [#48525](https://github.com/ClickHouse/ClickHouse/pull/48525) ([Igor Nikonov](https://github.com/devcrafter)). +* Query processing is parallelized right after reading from a data source. Affected data sources are mostly simple or external storages like table functions `url`, `file`. [#48727](https://github.com/ClickHouse/ClickHouse/pull/48727) ([Igor Nikonov](https://github.com/devcrafter)). +* Using correct memory order for counter in `numebers_mt()`. [#48729](https://github.com/ClickHouse/ClickHouse/pull/48729) ([Igor Nikonov](https://github.com/devcrafter)). +* Lowered contention of ThreadPool mutex (may increase performance for a huge amount of small jobs). [#48750](https://github.com/ClickHouse/ClickHouse/pull/48750) ([Sergei Trifonov](https://github.com/serxa)). +* Simplify accounting of approximate size of granule in prefetched read pool. [#49051](https://github.com/ClickHouse/ClickHouse/pull/49051) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Support config sections `keeper`/`keeper_server` as an alternative to `zookeeper`. Close [#34766](https://github.com/ClickHouse/ClickHouse/issues/34766) , [#34767](https://github.com/ClickHouse/ClickHouse/issues/34767). [#35113](https://github.com/ClickHouse/ClickHouse/pull/35113) ([李扬](https://github.com/taiyang-li)). +* Many issues in ClickHouse applications's help were fixed. Help is now written to stdout from all tools. Status code for `clickhouse help` invocation is now 0. Updated help for `clickhouse-local`, `clickhouse-benchmark`, `clickhouse-client`, `clickhouse hash`, `clickhouse su`, `clickhouse-install`. [#45819](https://github.com/ClickHouse/ClickHouse/pull/45819) ([Ilya Yatsishin](https://github.com/qoega)). +* Entries in the query cache are now squashed to max_block_size and compressed. [#45912](https://github.com/ClickHouse/ClickHouse/pull/45912) ([Robert Schulze](https://github.com/rschu1ze)). +* It is possible to set _secure_ flag in named_collections for a dictionary with a ClickHouse table source. Addresses [#38450](https://github.com/ClickHouse/ClickHouse/issues/38450) . [#46323](https://github.com/ClickHouse/ClickHouse/pull/46323) ([Ilya Golshtein](https://github.com/ilejn)). +* Functions replaceOne(), replaceAll(), replaceRegexpOne() and replaceRegexpAll() can now be called with non-const pattern and replacement arguments. [#46589](https://github.com/ClickHouse/ClickHouse/pull/46589) ([Robert Schulze](https://github.com/rschu1ze)). +* Bump internal ZSTD from 1.5.4 to 1.5.5. [#46797](https://github.com/ClickHouse/ClickHouse/pull/46797) ([Robert Schulze](https://github.com/rschu1ze)). +* If we run a mutation with IN (subquery) like this: `ALTER TABLE t UPDATE col='new value' WHERE id IN (SELECT id FROM huge_table)` and the table `t` has multiple parts than for each part a set for subquery `SELECT id FROM huge_table` is built in memory. And if there are many parts then this might consume a lot of memory (and lead to an OOM) and CPU. The solution is to introduce a short-lived cache of sets that are currently being built by mutation tasks. If another task of the same mutation is executed concurrently it can lookup the set in the cache, wait for it be be built and reuse it. [#46835](https://github.com/ClickHouse/ClickHouse/pull/46835) ([Alexander Gololobov](https://github.com/davenger)). +* Added configurable retries for all operations with [Zoo]Keeper for Backup queries. [#47224](https://github.com/ClickHouse/ClickHouse/pull/47224) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add async connection to socket and async writing to socket. Make creating connections and sending query/external tables async across shards. Refactor code with fibers. Closes [#46931](https://github.com/ClickHouse/ClickHouse/issues/46931). We will be able to increase `connect_timeout_with_failover_ms` by default after this PR (https://github.com/ClickHouse/ClickHouse/issues/5188). [#47229](https://github.com/ClickHouse/ClickHouse/pull/47229) ([Kruglov Pavel](https://github.com/Avogar)). +* Formatter '%M' in function formatDateTime() now prints the month name instead of the minutes. This makes the behavior consistent with MySQL. The previous behavior can be restored using setting "formatdatetime_parsedatetime_m_is_month_name = 0". [#47246](https://github.com/ClickHouse/ClickHouse/pull/47246) ([Robert Schulze](https://github.com/rschu1ze)). +* Several improvements around data lakes: - Make StorageIceberg work with non-partitioned data. - Support Iceberg format version V2 (previously only V1 was supported) - Support reading partitioned data for DeltaLake/Hudi - Faster reading of DeltaLake metadata by using Delta's checkpoint files - Fixed incorrect Hudi reads: previously it incorrectly chose which data to read and therefore was able to read correctly only small size tables - Made these engines to pickup updates of changed data (previously the state was set on table creation) - Make proper testing for Iceberg/DeltaLake/Hudi using spark. [#47307](https://github.com/ClickHouse/ClickHouse/pull/47307) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Enable `use_environment_credentials` for S3 by default, so the entire provider chain is constructed by default. [#47397](https://github.com/ClickHouse/ClickHouse/pull/47397) ([Antonio Andelic](https://github.com/antonio2368)). +* Currently, the JSON_VALUE function is similar as spark's get_json_object function, which support to get value from json string by a path like '$.key'. But still has something different - 1. in spark's get_json_object will return null while the path is not exist, but in JSON_VALUE will return empty string; - 2. in spark's get_json_object will return a complext type value, such as a json object/array value, but in JSON_VALUE will return empty string. [#47494](https://github.com/ClickHouse/ClickHouse/pull/47494) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add CNF/constraint optimizer in new analyzer. [#47617](https://github.com/ClickHouse/ClickHouse/pull/47617) ([Antonio Andelic](https://github.com/antonio2368)). +* For use_structure_from_insertion_table_in_table_functions more flexible insert table structure propagation to table function. Fixed bug with name mapping and using virtual columns. No more need for 'auto' setting. [#47962](https://github.com/ClickHouse/ClickHouse/pull/47962) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Do not continue retrying to connect to ZK if the query is killed or over limits. [#47985](https://github.com/ClickHouse/ClickHouse/pull/47985) ([Raúl Marín](https://github.com/Algunenano)). +* Added functions to work with columns of type `Map`: `mapConcat`, `mapSort`, `mapExists`. [#48071](https://github.com/ClickHouse/ClickHouse/pull/48071) ([Anton Popov](https://github.com/CurtizJ)). +* Support Enum output/input in BSONEachRow, allow all map key types and avoid extra calculations on output. [#48122](https://github.com/ClickHouse/ClickHouse/pull/48122) ([Kruglov Pavel](https://github.com/Avogar)). +* Support more ClickHouse types in ORC/Arrow/Parquet formats: Enum(8|16), (U)Int(128|256), Decimal256 (for ORC), allow reading IPv4 from Int32 values (ORC outputs IPv4 as Int32 and we couldn't read it back), fix reading Nullable(IPv6) from binary data for ORC. [#48126](https://github.com/ClickHouse/ClickHouse/pull/48126) ([Kruglov Pavel](https://github.com/Avogar)). +* Add columns `perform_ttl_move_on_insert`, `load_balancing` for table `system.storage_policies`, modify column `volume_type` type to `enum8`. [#48167](https://github.com/ClickHouse/ClickHouse/pull/48167) ([lizhuoyu5](https://github.com/lzydmxy)). +* Added support for `BACKUP ALL` command which backups all tables and databases, including temporary and system ones. [#48189](https://github.com/ClickHouse/ClickHouse/pull/48189) ([Vitaly Baranov](https://github.com/vitlibar)). +* Function mapFromArrays support map type as input. [#48207](https://github.com/ClickHouse/ClickHouse/pull/48207) ([李扬](https://github.com/taiyang-li)). +* The output of some SHOW PROCESSLIST is now sorted. [#48241](https://github.com/ClickHouse/ClickHouse/pull/48241) ([Robert Schulze](https://github.com/rschu1ze)). +* Per-query/per-server throttling for remote IO/local IO/BACKUPs (server settings: `max_remote_read_network_bandwidth_for_server`, `max_remote_write_network_bandwidth_for_server`, `max_local_read_bandwidth_for_server`, `max_local_write_bandwidth_for_server`, `max_backup_bandwidth_for_server`, settings: `max_remote_read_network_bandwidth`, `max_remote_write_network_bandwidth`, `max_local_read_bandwidth`, `max_local_write_bandwidth`, `max_backup_bandwidth`). [#48242](https://github.com/ClickHouse/ClickHouse/pull/48242) ([Azat Khuzhin](https://github.com/azat)). +* Support more types in CapnProto format: Map, (U)Int(128|256), Decimal(128|256). Allow integer conversions during input/output. [#48257](https://github.com/ClickHouse/ClickHouse/pull/48257) ([Kruglov Pavel](https://github.com/Avogar)). +* It is now possible to define per-user quotas in the query cache. [#48284](https://github.com/ClickHouse/ClickHouse/pull/48284) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't throw CURRENT_WRITE_BUFFER_IS_EXHAUSTED for normal behaviour. [#48288](https://github.com/ClickHouse/ClickHouse/pull/48288) ([Raúl Marín](https://github.com/Algunenano)). +* Add new setting `keeper_map_strict_mode` which enforces extra guarantees on operations made on top of `KeeperMap` tables. [#48293](https://github.com/ClickHouse/ClickHouse/pull/48293) ([Antonio Andelic](https://github.com/antonio2368)). +* Check primary key type for simple dictionary is native unsigned integer type Add setting `check_dictionary_primary_key ` for compatibility(set `check_dictionary_primary_key =false` to disable checking). [#48335](https://github.com/ClickHouse/ClickHouse/pull/48335) ([lizhuoyu5](https://github.com/lzydmxy)). +* Don't replicate mutations for `KeeperMap` because it's unnecessary. [#48354](https://github.com/ClickHouse/ClickHouse/pull/48354) ([Antonio Andelic](https://github.com/antonio2368)). +* Allow write/read unnamed tuple as nested Message in Protobuf format. Tuple elements and Message fields are mathced by position. [#48390](https://github.com/ClickHouse/ClickHouse/pull/48390) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `additional_table_filters` and `additional_result_filter` settings in the new planner. Also, add a documentation entry for `additional_result_filter`. [#48405](https://github.com/ClickHouse/ClickHouse/pull/48405) ([Dmitry Novik](https://github.com/novikd)). +* Parsedatetime now understands format string '%f' (fractional seconds). [#48420](https://github.com/ClickHouse/ClickHouse/pull/48420) ([Robert Schulze](https://github.com/rschu1ze)). +* Format string "%f" in formatDateTime() now prints "000000" if the formatted value has no fractional seconds, the previous behavior (single zero) can be restored using setting "formatdatetime_f_prints_single_zero = 1". [#48422](https://github.com/ClickHouse/ClickHouse/pull/48422) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't replicate DELETE and TRUNCATE for KeeperMap. [#48434](https://github.com/ClickHouse/ClickHouse/pull/48434) ([Antonio Andelic](https://github.com/antonio2368)). +* Generate valid Decimals and Bools in generateRandom function. [#48436](https://github.com/ClickHouse/ClickHouse/pull/48436) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow trailing commas in expression list of SELECT query, for example `SELECT a, b, c, FROM table`. Closes [#37802](https://github.com/ClickHouse/ClickHouse/issues/37802). [#48438](https://github.com/ClickHouse/ClickHouse/pull/48438) ([Nikolay Degterinsky](https://github.com/evillique)). +* Override `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables with `--user` and `--password` client parameters. Closes [#38909](https://github.com/ClickHouse/ClickHouse/issues/38909). [#48440](https://github.com/ClickHouse/ClickHouse/pull/48440) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added retries to loading of data parts in `MergeTree` tables in case of retryable errors. [#48442](https://github.com/ClickHouse/ClickHouse/pull/48442) ([Anton Popov](https://github.com/CurtizJ)). +* Add support for `Date`, `Date32`, `DateTime`, `DateTime64` data types to `arrayMin`, `arrayMax`, `arrayDifference` functions. Closes [#21645](https://github.com/ClickHouse/ClickHouse/issues/21645). [#48445](https://github.com/ClickHouse/ClickHouse/pull/48445) ([Nikolay Degterinsky](https://github.com/evillique)). +* Reduce memory usage for multiple `ALTER DELETE` mutations. [#48522](https://github.com/ClickHouse/ClickHouse/pull/48522) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Primary/secondary indices and sorting keys with identical expressions are now rejected. This behavior can be disabled using setting `allow_suspicious_indices`. [#48536](https://github.com/ClickHouse/ClickHouse/pull/48536) ([凌涛](https://github.com/lingtaolf)). +* Just fix small typo in comment around `lockForAlter` method in `IStorage.h`. [#48559](https://github.com/ClickHouse/ClickHouse/pull/48559) ([artem-pershin](https://github.com/artem-pershin)). +* Add support for `{server_uuid}` macro. It is useful for identifying replicas in autoscaled clusters when new replicas are constantly added and removed in runtime. This closes [#48554](https://github.com/ClickHouse/ClickHouse/issues/48554). [#48563](https://github.com/ClickHouse/ClickHouse/pull/48563) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The installation script will create a hard link instead of copying if it is possible. [#48578](https://github.com/ClickHouse/ClickHouse/pull/48578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support `SHOW TABLE` syntax meaning the same as `SHOW CREATE TABLE`. Closes [#48580](https://github.com/ClickHouse/ClickHouse/issues/48580). [#48591](https://github.com/ClickHouse/ClickHouse/pull/48591) ([flynn](https://github.com/ucasfl)). +* HTTP temporary buffer support working with fs cache. [#48664](https://github.com/ClickHouse/ClickHouse/pull/48664) ([Vladimir C](https://github.com/vdimir)). +* Make Schema inference works for `CREATE AS SELECT`. Closes [#47599](https://github.com/ClickHouse/ClickHouse/issues/47599). [#48679](https://github.com/ClickHouse/ClickHouse/pull/48679) ([flynn](https://github.com/ucasfl)). +* Added a `replicated_max_mutations_in_one_entry` setting for `ReplicatedMergeTree` that allows limiting the number of mutation commands per one `MUTATE_PART` entry (default is 10000). [#48731](https://github.com/ClickHouse/ClickHouse/pull/48731) ([Alexander Tokmakov](https://github.com/tavplubix)). +* In AggregateFunction types, don't count unused arena bytes as `read_bytes`. [#48745](https://github.com/ClickHouse/ClickHouse/pull/48745) ([Raúl Marín](https://github.com/Algunenano)). +* Fix some mysql related settings not being handled with mysql dictionary source + named collection. Closes [#48402](https://github.com/ClickHouse/ClickHouse/issues/48402). [#48759](https://github.com/ClickHouse/ClickHouse/pull/48759) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix squashing in query cache. [#48763](https://github.com/ClickHouse/ClickHouse/pull/48763) ([Robert Schulze](https://github.com/rschu1ze)). +* Support following new jsonpath format - '$.1key', path element begins with number - '$[key]', '$[“key”]', '$[\\\'key\\\']', '$["key 123"]', path element embraced in []. [#48768](https://github.com/ClickHouse/ClickHouse/pull/48768) ([lgbo](https://github.com/lgbo-ustc)). +* If a user set `max_single_part_upload_size` to a very large value, it can lead to a crash due to a bug in the AWS S3 SDK. This fixes [#47679](https://github.com/ClickHouse/ClickHouse/issues/47679). [#48816](https://github.com/ClickHouse/ClickHouse/pull/48816) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Not for changelog. [#48824](https://github.com/ClickHouse/ClickHouse/pull/48824) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix data race in `StorageRabbitMQ` ([report](https://pastila.nl/?004f7100/de1505289ab5bb355e67ebe6c7cc8707)), refactor the code. [#48845](https://github.com/ClickHouse/ClickHouse/pull/48845) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add aliases `name` and `part_name` form `system.parts` and `system.part_log`. Closes [#48718](https://github.com/ClickHouse/ClickHouse/issues/48718). [#48850](https://github.com/ClickHouse/ClickHouse/pull/48850) ([sichenzhao](https://github.com/sichenzhao)). +* Functions "arrayDifferenceSupport()", "arrayCumSum()" and "arrayCumSumNonNegative()" now support input arrays of wide integer types (U)Int128/256. [#48866](https://github.com/ClickHouse/ClickHouse/pull/48866) ([cluster](https://github.com/infdahai)). +* Multi-line history in clickhouse-client is now no longer padded. This makes pasting more natural. [#48870](https://github.com/ClickHouse/ClickHouse/pull/48870) ([Joanna Hulboj](https://github.com/jh0x)). +* Not for changelog. [#48873](https://github.com/ClickHouse/ClickHouse/pull/48873) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Implement a slight improvement for the rare case when ClickHouse is run inside LXC and LXCFS is used. The LXCFS has an issue: sometimes it returns an error "Transport endpoint is not connected" on reading from the file inside `/proc`. This error was correctly logged into ClickHouse's server log. We have additionally workaround this issue by reopening a file. This is a minuscule change. [#48922](https://github.com/ClickHouse/ClickHouse/pull/48922) ([Real](https://github.com/RunningXie)). +* Improve memory accounting for prefetches. Randomise prefetch settings In CI. [#48973](https://github.com/ClickHouse/ClickHouse/pull/48973) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly set headers for native copy operations on GCS. [#48981](https://github.com/ClickHouse/ClickHouse/pull/48981) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for specifying setting names in the command line with dashes instead of underscores, for example, `--max-threads` instead of `--max_threads`. Additionally, support Unicode dash characters like `—` instead of `--` - this is useful when you communicate with a team in another company, and a manager from that team copy-pasted code from MS Word. [#48985](https://github.com/ClickHouse/ClickHouse/pull/48985) ([alekseygolub](https://github.com/alekseygolub)). +* Add fallback to password authentication when authentication with SSL user certificate has failed. Closes [#48974](https://github.com/ClickHouse/ClickHouse/issues/48974). [#48989](https://github.com/ClickHouse/ClickHouse/pull/48989) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase default value for `connect_timeout_with_failover_ms` to 1000 ms (because of adding async connections in https://github.com/ClickHouse/ClickHouse/pull/47229) . Closes [#5188](https://github.com/ClickHouse/ClickHouse/issues/5188). [#49009](https://github.com/ClickHouse/ClickHouse/pull/49009) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve the embedded dashboard. Close [#46671](https://github.com/ClickHouse/ClickHouse/issues/46671). [#49036](https://github.com/ClickHouse/ClickHouse/pull/49036) ([Kevin Zhang](https://github.com/Kinzeng)). +* Add profile events for log messages, so you can easily see the count of log messages by severity. [#49042](https://github.com/ClickHouse/ClickHouse/pull/49042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* `bitCount` function support `FixedString` and `String` data type. [#49044](https://github.com/ClickHouse/ClickHouse/pull/49044) ([flynn](https://github.com/ucasfl)). +* In previous versions, the `LineAsString` format worked inconsistently when the parallel parsing was enabled or not, in presence of DOS or MacOS Classic line breaks. This closes [#49039](https://github.com/ClickHouse/ClickHouse/issues/49039). [#49052](https://github.com/ClickHouse/ClickHouse/pull/49052) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The exception message about the unparsed query parameter will also tell about the name of the parameter. Reimplement [#48878](https://github.com/ClickHouse/ClickHouse/issues/48878). Close [#48772](https://github.com/ClickHouse/ClickHouse/issues/48772). [#49061](https://github.com/ClickHouse/ClickHouse/pull/49061) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added field `rows` with number of rows parsed from asynchronous insert to `system.asynchronous_insert_log`. [#49120](https://github.com/ClickHouse/ClickHouse/pull/49120) ([Anton Popov](https://github.com/CurtizJ)). +* 1. Bump Intel QPL from v1.0.0 to v1.1.0 (fixes build issue [#47877](https://github.com/ClickHouse/ClickHouse/issues/47877)) 2. the DEFLATE_QPL codec now respects the maximum hardware jobs returned by libaccel_config. [#49126](https://github.com/ClickHouse/ClickHouse/pull/49126) ([jasperzhu](https://github.com/jinjunzh)). + +#### Build/Testing/Packaging Improvement +* Reduce the number of dependencies in the header files to speed up the build. [#47984](https://github.com/ClickHouse/ClickHouse/pull/47984) ([Dmitry Novik](https://github.com/novikd)). +* Randomize compression of marks and indices in tests. [#48286](https://github.com/ClickHouse/ClickHouse/pull/48286) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Randomize vertical merges from compact to wide parts in tests. [#48287](https://github.com/ClickHouse/ClickHouse/pull/48287) ([Raúl Marín](https://github.com/Algunenano)). +* With the current approach, all ports are calculated at the beginning and could overlap or even be highjacked, see [the report](https://s3.amazonaws.com/clickhouse-test-reports/46793/02928ae50c52f31ce8e5bfa99eb1b5db046f4a4f/integration_tests__release__[1/2]/integration_run_parallel8_0.log) for `port is already allocated`. It's possibly the reason for [#45368](https://github.com/ClickHouse/ClickHouse/issues/45368). [#48393](https://github.com/ClickHouse/ClickHouse/pull/48393) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update time zones. The following were updated: Africa/Cairo, Africa/Casablanca, Africa/El_Aaiun, America/Bogota, America/Cambridge_Bay, America/Ciudad_Juarez, America/Godthab, America/Inuvik, America/Iqaluit, America/Nuuk, America/Ojinaga, America/Pangnirtung, America/Rankin_Inlet, America/Resolute, America/Whitehorse, America/Yellowknife, Asia/Gaza, Asia/Hebron, Asia/Kuala_Lumpur, Asia/Singapore, Canada/Yukon, Egypt, Europe/Kirov, Europe/Volgograd, Singapore. [#48572](https://github.com/ClickHouse/ClickHouse/pull/48572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support for CRC32 checksum in HDFS. Fix performance issues. [#48614](https://github.com/ClickHouse/ClickHouse/pull/48614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove remainders of GCC support. [#48671](https://github.com/ClickHouse/ClickHouse/pull/48671) ([Robert Schulze](https://github.com/rschu1ze)). +* Add CI run with new analyzer infrastructure enabled. [#48719](https://github.com/ClickHouse/ClickHouse/pull/48719) ([Dmitry Novik](https://github.com/novikd)). +* Not for changelog. [#48879](https://github.com/ClickHouse/ClickHouse/pull/48879) ([larryluogit](https://github.com/larryluogit)). +* After the recent update, the `dockerd` requires `--tlsverify=false` together with the http port explicitly. [#48924](https://github.com/ClickHouse/ClickHouse/pull/48924) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Run more functional tests concurrently. [#48970](https://github.com/ClickHouse/ClickHouse/pull/48970) ([alesapin](https://github.com/alesapin)). +* Fix glibc compatibility check: replace `preadv` from musl. [#49144](https://github.com/ClickHouse/ClickHouse/pull/49144) ([alesapin](https://github.com/alesapin)). +* Use position independent encoding/code for sanitizers (at least msan :D) build to avoid issues with maximum relocation size. [#49145](https://github.com/ClickHouse/ClickHouse/pull/49145) ([alesapin](https://github.com/alesapin)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix system.query_views_log for MVs that are pushed from background threads [#46668](https://github.com/ClickHouse/ClickHouse/pull/46668) ([Azat Khuzhin](https://github.com/azat)). +* Fix several `RENAME COLUMN` bugs [#46946](https://github.com/ClickHouse/ClickHouse/pull/46946) ([alesapin](https://github.com/alesapin)). +* Fix minor hiliting issues in clickhouse-format [#47610](https://github.com/ClickHouse/ClickHouse/pull/47610) ([Natasha Murashkina](https://github.com/murfel)). +* Fix crash for uploading parts which size is greater then INT_MAX to S3 [#47693](https://github.com/ClickHouse/ClickHouse/pull/47693) ([Azat Khuzhin](https://github.com/azat)). +* Fix overflow in sparkbar function [#48121](https://github.com/ClickHouse/ClickHouse/pull/48121) ([Vladimir C](https://github.com/vdimir)). +* Fix race in StorageS3 [#48190](https://github.com/ClickHouse/ClickHouse/pull/48190) ([Anton Popov](https://github.com/CurtizJ)). +* Remove a feature [#48195](https://github.com/ClickHouse/ClickHouse/pull/48195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix alter formatting (minor) [#48289](https://github.com/ClickHouse/ClickHouse/pull/48289) ([Natasha Murashkina](https://github.com/murfel)). +* Fix cpu usage in rabbitmq (was worsened in 23.2 after [#44404](https://github.com/ClickHouse/ClickHouse/issues/44404)) [#48311](https://github.com/ClickHouse/ClickHouse/pull/48311) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ThreadPool for DistributedSink and use StrongTypedef for CurrentMetrics/ProfileEvents/StatusInfo to avoid further errors [#48314](https://github.com/ClickHouse/ClickHouse/pull/48314) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in EXPLAIN PIPELINE for Merge over Distributed [#48320](https://github.com/ClickHouse/ClickHouse/pull/48320) ([Azat Khuzhin](https://github.com/azat)). +* Check node for Backup Restore concurrency [#48342](https://github.com/ClickHouse/ClickHouse/pull/48342) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* close client [#48347](https://github.com/ClickHouse/ClickHouse/pull/48347) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix serializing LowCardinality as Arrow dictionary [#48361](https://github.com/ClickHouse/ClickHouse/pull/48361) ([Kruglov Pavel](https://github.com/Avogar)). +* Reset downloader for cache file segment in TemporaryFileStream [#48386](https://github.com/ClickHouse/ClickHouse/pull/48386) ([Vladimir C](https://github.com/vdimir)). +* Fix possible SYSTEM SYNC REPLICA stuck in case of DROP/REPLACE PARTITION [#48391](https://github.com/ClickHouse/ClickHouse/pull/48391) ([Azat Khuzhin](https://github.com/azat)). +* ClickHouse startup error when loading a distributed table that depends on a dictionary [#48419](https://github.com/ClickHouse/ClickHouse/pull/48419) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Don't check dependencies when renaming system tables automatically [#48431](https://github.com/ClickHouse/ClickHouse/pull/48431) ([Raúl Marín](https://github.com/Algunenano)). +* Some fixes for parallel replicas [#48433](https://github.com/ClickHouse/ClickHouse/pull/48433) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Update only affected rows in KV storage [#48435](https://github.com/ClickHouse/ClickHouse/pull/48435) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible segfault in cache [#48469](https://github.com/ClickHouse/ClickHouse/pull/48469) ([Kseniia Sumarokova](https://github.com/kssenii)). +* toTimeZone function throw an error when no constant string is provided [#48471](https://github.com/ClickHouse/ClickHouse/pull/48471) ([Jordi Villar](https://github.com/jrdi)). +* Fix logical error with IPv4 in Protobuf, add support for Date32 [#48486](https://github.com/ClickHouse/ClickHouse/pull/48486) ([Kruglov Pavel](https://github.com/Avogar)). +* "changed" flag in system.settings is calculated incorrectly for settings with multiple values [#48516](https://github.com/ClickHouse/ClickHouse/pull/48516) ([MikhailBurdukov](https://github.com/MikhailBurdukov)). +* Fix storage `Memory` with enabled compression [#48517](https://github.com/ClickHouse/ClickHouse/pull/48517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix bracketed-paste mode messing up password input in client reconnect [#48528](https://github.com/ClickHouse/ClickHouse/pull/48528) ([Michael Kolupaev](https://github.com/al13n321)). +* Avoid sending `nullptr` to `memcpy` in `copyStringInArena` [#48532](https://github.com/ClickHouse/ClickHouse/pull/48532) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix nested map for keys of IP and UUID types [#48556](https://github.com/ClickHouse/ClickHouse/pull/48556) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix uncaught exception in case of parallel loader for hashed dictionaries [#48571](https://github.com/ClickHouse/ClickHouse/pull/48571) ([Azat Khuzhin](https://github.com/azat)). +* `groupArray` returns cannot be nullable [#48593](https://github.com/ClickHouse/ClickHouse/pull/48593) ([lgbo](https://github.com/lgbo-ustc)). +* Fix bug in Keeper when a node is not created with scheme `auth` in ACL sometimes. [#48595](https://github.com/ClickHouse/ClickHouse/pull/48595) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix IPv4 comparable with UInt [#48611](https://github.com/ClickHouse/ClickHouse/pull/48611) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix possible error from cache [#48636](https://github.com/ClickHouse/ClickHouse/pull/48636) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix async inserts with empty data [#48663](https://github.com/ClickHouse/ClickHouse/pull/48663) ([Anton Popov](https://github.com/CurtizJ)). +* Fix table dependencies in case of failed RENAME TABLE [#48683](https://github.com/ClickHouse/ClickHouse/pull/48683) ([Azat Khuzhin](https://github.com/azat)). +* Fix zero-copy-replication on encrypted disks. [#48741](https://github.com/ClickHouse/ClickHouse/pull/48741) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix skip_unavailable_shards in case of unavailable hosts [#48771](https://github.com/ClickHouse/ClickHouse/pull/48771) ([Azat Khuzhin](https://github.com/azat)). +* Fix key condition on duplicate primary keys [#48838](https://github.com/ClickHouse/ClickHouse/pull/48838) ([Amos Bird](https://github.com/amosbird)). +* Fix for race in ZooKeeper when joining send_thread/receive_thread [#48849](https://github.com/ClickHouse/ClickHouse/pull/48849) ([Alexander Gololobov](https://github.com/davenger)). +* Fix unexpected part name error when trying to drop a ignored detached part with zero copy replication [#48862](https://github.com/ClickHouse/ClickHouse/pull/48862) ([Michael Lex](https://github.com/mlex)). +* Fix reading Date32 Parquet/Arrow column into not Date32 column [#48864](https://github.com/ClickHouse/ClickHouse/pull/48864) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix UNKNOWN_IDENTIFIER error while select from table with row policy and column with dots [#48976](https://github.com/ClickHouse/ClickHouse/pull/48976) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix aggregate empty string error [#48999](https://github.com/ClickHouse/ClickHouse/pull/48999) ([LiuNeng](https://github.com/liuneng1994)). +* Fix postgres database setting [#49100](https://github.com/ClickHouse/ClickHouse/pull/49100) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test_cache_with_full_disk_space [#49110](https://github.com/ClickHouse/ClickHouse/pull/49110) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix "prepared statement insert already exists" [#49154](https://github.com/ClickHouse/ClickHouse/pull/49154) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix replace[Regexp]{One,All}() with const haystacks [#49220](https://github.com/ClickHouse/ClickHouse/pull/49220) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Build Improvement + +* Fixed hashing issue in creating partition IDs for s390x. [#48134](https://github.com/ClickHouse/ClickHouse/pull/48134) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Randomize JIT settings in tests"'. [#48277](https://github.com/ClickHouse/ClickHouse/pull/48277) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Fix test "02494_query_cache_drop.sql"'. [#48358](https://github.com/ClickHouse/ClickHouse/pull/48358) ([Anton Popov](https://github.com/CurtizJ)). +* NO CL ENTRY: 'Revert "Check simple dictionary key is native unsigned integer"'. [#48732](https://github.com/ClickHouse/ClickHouse/pull/48732) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Make Schema inference works for CREATE AS SELECT"'. [#48758](https://github.com/ClickHouse/ClickHouse/pull/48758) ([pufit](https://github.com/pufit)). +* NO CL ENTRY: 'Revert "Add MemoryTracker for the background tasks"'. [#48760](https://github.com/ClickHouse/ClickHouse/pull/48760) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Added tests for ClickHouse apps help and fixed help issues"'. [#48991](https://github.com/ClickHouse/ClickHouse/pull/48991) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Adapt marks count for prefetch read pool"'. [#49068](https://github.com/ClickHouse/ClickHouse/pull/49068) ([Nikita Taranov](https://github.com/nickitat)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* merge and mutation make thread group for setting memory trackers right [#47104](https://github.com/ClickHouse/ClickHouse/pull/47104) ([Sema Checherinda](https://github.com/CheSema)). +* Query plan: update sort description [#47319](https://github.com/ClickHouse/ClickHouse/pull/47319) ([Igor Nikonov](https://github.com/devcrafter)). +* Sqllogic [#47784](https://github.com/ClickHouse/ClickHouse/pull/47784) ([Sema Checherinda](https://github.com/CheSema)). +* Fix race between DROP MatView and RESTART REPLICAS [#47863](https://github.com/ClickHouse/ClickHouse/pull/47863) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merge [#35113](https://github.com/ClickHouse/ClickHouse/issues/35113) [#47934](https://github.com/ClickHouse/ClickHouse/pull/47934) ([Antonio Andelic](https://github.com/antonio2368)). +* Add a test for ClientInfo initial_query_start_time in inter-server mode [#48036](https://github.com/ClickHouse/ClickHouse/pull/48036) ([Azat Khuzhin](https://github.com/azat)). +* Make custom key for parallel replicas work in new analyzer [#48054](https://github.com/ClickHouse/ClickHouse/pull/48054) ([Antonio Andelic](https://github.com/antonio2368)). +* throw exception while non-parametric functions having parameters [#48115](https://github.com/ClickHouse/ClickHouse/pull/48115) ([save-my-heart](https://github.com/save-my-heart)). +* Move FunctionsJSON implementation to header file [#48142](https://github.com/ClickHouse/ClickHouse/pull/48142) ([DimasKovas](https://github.com/DimasKovas)). +* Use ThreadPool in PipelineExecutor [#48146](https://github.com/ClickHouse/ClickHouse/pull/48146) ([Azat Khuzhin](https://github.com/azat)). +* Add sanity checks for writing number in variable length format (resubmit) [#48154](https://github.com/ClickHouse/ClickHouse/pull/48154) ([Azat Khuzhin](https://github.com/azat)). +* Try fix 02151_hash_table_sizes_stats.sh test [#48178](https://github.com/ClickHouse/ClickHouse/pull/48178) ([Nikita Taranov](https://github.com/nickitat)). +* Add scripts for sparse checkout of some contribs [#48183](https://github.com/ClickHouse/ClickHouse/pull/48183) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not take lock for shared context in setTempDataOnDisk [#48219](https://github.com/ClickHouse/ClickHouse/pull/48219) ([Vladimir C](https://github.com/vdimir)). +* parseDateTime[InJodaSyntax](): Require format argument [#48222](https://github.com/ClickHouse/ClickHouse/pull/48222) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not partially cancel processors added from expand pipeline. [#48231](https://github.com/ClickHouse/ClickHouse/pull/48231) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix some tests [#48267](https://github.com/ClickHouse/ClickHouse/pull/48267) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix compiling examples without Hive [#48269](https://github.com/ClickHouse/ClickHouse/pull/48269) ([Azat Khuzhin](https://github.com/azat)). +* In messages, put values into quotes [#48271](https://github.com/ClickHouse/ClickHouse/pull/48271) ([Vadim Chekan](https://github.com/vchekan)). +* Fix 01710_projection_optimize_materialize flakiness [#48276](https://github.com/ClickHouse/ClickHouse/pull/48276) ([Azat Khuzhin](https://github.com/azat)). +* Fix UB (signed integer overflow) in StorageMergeTree::backupData() [#48278](https://github.com/ClickHouse/ClickHouse/pull/48278) ([Azat Khuzhin](https://github.com/azat)). +* Update version after release [#48279](https://github.com/ClickHouse/ClickHouse/pull/48279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.3.1.2823-lts [#48281](https://github.com/ClickHouse/ClickHouse/pull/48281) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Small follow-up to [#48017](https://github.com/ClickHouse/ClickHouse/issues/48017) [#48292](https://github.com/ClickHouse/ClickHouse/pull/48292) ([Robert Schulze](https://github.com/rschu1ze)). +* Try to update arrow library to release 11.0.0 [#48294](https://github.com/ClickHouse/ClickHouse/pull/48294) ([Kruglov Pavel](https://github.com/Avogar)). +* fix test numbers again 2 [#48295](https://github.com/ClickHouse/ClickHouse/pull/48295) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix: copy forgotten show_secrets in FormatSettings semi-copy-ctor [#48297](https://github.com/ClickHouse/ClickHouse/pull/48297) ([Natasha Murashkina](https://github.com/murfel)). +* Do not remove inputs from maybe compiled DAG. [#48303](https://github.com/ClickHouse/ClickHouse/pull/48303) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update version_date.tsv and changelogs after v22.3.20.29-lts [#48304](https://github.com/ClickHouse/ClickHouse/pull/48304) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.12.6.22-stable, v22.3.20.29-lts [#48305](https://github.com/ClickHouse/ClickHouse/pull/48305) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Merging [#46323](https://github.com/ClickHouse/ClickHouse/issues/46323) [#48312](https://github.com/ClickHouse/ClickHouse/pull/48312) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Follow-up to [#47863](https://github.com/ClickHouse/ClickHouse/issues/47863) [#48315](https://github.com/ClickHouse/ClickHouse/pull/48315) ([Alexander Tokmakov](https://github.com/tavplubix)). +* test / some complex query (it fails with analyzer enabled) [#48324](https://github.com/ClickHouse/ClickHouse/pull/48324) ([Denny Crane](https://github.com/den-crane)). +* Fix constraints after merge [#48328](https://github.com/ClickHouse/ClickHouse/pull/48328) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add logging for concurrency checks for backups [#48337](https://github.com/ClickHouse/ClickHouse/pull/48337) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update version_date.tsv and changelogs after v23.1.6.42-stable [#48345](https://github.com/ClickHouse/ClickHouse/pull/48345) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.2.5.46-stable [#48346](https://github.com/ClickHouse/ClickHouse/pull/48346) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix lambda type resolution [#48355](https://github.com/ClickHouse/ClickHouse/pull/48355) ([Dmitry Novik](https://github.com/novikd)). +* Avoid abort in protobuf library in debug build [#48356](https://github.com/ClickHouse/ClickHouse/pull/48356) ([Kruglov Pavel](https://github.com/Avogar)). +* Batch fix for projections analysis with analyzer. [#48357](https://github.com/ClickHouse/ClickHouse/pull/48357) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix tests with explain and analyzer where names changed. [#48360](https://github.com/ClickHouse/ClickHouse/pull/48360) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Small follow-up to [#45912](https://github.com/ClickHouse/ClickHouse/issues/45912) [#48373](https://github.com/ClickHouse/ClickHouse/pull/48373) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v22.8.16.32-lts [#48376](https://github.com/ClickHouse/ClickHouse/pull/48376) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Add script for a slack bot that reports broken tests [#48382](https://github.com/ClickHouse/ClickHouse/pull/48382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky `test_keeper_mntr_data_size` [#48384](https://github.com/ClickHouse/ClickHouse/pull/48384) ([Antonio Andelic](https://github.com/antonio2368)). +* WITH FILL clarification and cleanup [#48395](https://github.com/ClickHouse/ClickHouse/pull/48395) ([Igor Nikonov](https://github.com/devcrafter)). +* Cleanup mess in .clang-tidy [#48396](https://github.com/ClickHouse/ClickHouse/pull/48396) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix test_backup_all [#48400](https://github.com/ClickHouse/ClickHouse/pull/48400) ([Vitaly Baranov](https://github.com/vitlibar)). +* Find big allocations without memory limits checks [#48401](https://github.com/ClickHouse/ClickHouse/pull/48401) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix issue with krb5 and building w/ OpenSSL [#48407](https://github.com/ClickHouse/ClickHouse/pull/48407) ([Boris Kuschel](https://github.com/bkuschel)). +* Make CI slack bot less noisy [#48409](https://github.com/ClickHouse/ClickHouse/pull/48409) ([Alexander Tokmakov](https://github.com/tavplubix)). +* AST fuzzer: Fix assertion in TopK serialization [#48412](https://github.com/ClickHouse/ClickHouse/pull/48412) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible flakiness of lightweight delete tests (due to index granularity randomization) [#48413](https://github.com/ClickHouse/ClickHouse/pull/48413) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky `test_keeper_snapshots` [#48417](https://github.com/ClickHouse/ClickHouse/pull/48417) ([Antonio Andelic](https://github.com/antonio2368)). +* Update sort desc: more efficient original node search in ActionsDAG [#48427](https://github.com/ClickHouse/ClickHouse/pull/48427) ([Igor Nikonov](https://github.com/devcrafter)). +* test for [#16399](https://github.com/ClickHouse/ClickHouse/issues/16399) [#48439](https://github.com/ClickHouse/ClickHouse/pull/48439) ([Denny Crane](https://github.com/den-crane)). +* Better exception messages from Keeper client [#48444](https://github.com/ClickHouse/ClickHouse/pull/48444) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small documentation follow-up to [#47246](https://github.com/ClickHouse/ClickHouse/issues/47246) [#48463](https://github.com/ClickHouse/ClickHouse/pull/48463) ([Robert Schulze](https://github.com/rschu1ze)). +* Update 00002_log_and_exception_messages_formatting.sql [#48467](https://github.com/ClickHouse/ClickHouse/pull/48467) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Avoid operation on uninitialised data in readDateTimeTextImpl [#48472](https://github.com/ClickHouse/ClickHouse/pull/48472) ([Kruglov Pavel](https://github.com/Avogar)). +* Add reading step for system zookeeper. Analyze path from filter DAG. [#48485](https://github.com/ClickHouse/ClickHouse/pull/48485) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix deadlock due to debug tracking of memory allocations [#48487](https://github.com/ClickHouse/ClickHouse/pull/48487) ([Azat Khuzhin](https://github.com/azat)). +* Register datediff and trim aliases in system.functions [#48489](https://github.com/ClickHouse/ClickHouse/pull/48489) ([Robert Schulze](https://github.com/rschu1ze)). +* Change error code [#48490](https://github.com/ClickHouse/ClickHouse/pull/48490) ([Anton Popov](https://github.com/CurtizJ)). +* Update 00002_log_and_exception_messages_formatting.sql [#48499](https://github.com/ClickHouse/ClickHouse/pull/48499) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix query cache with sparse columns [#48500](https://github.com/ClickHouse/ClickHouse/pull/48500) ([Anton Popov](https://github.com/CurtizJ)). +* Use std::string_view to get rid of strlen [#48509](https://github.com/ClickHouse/ClickHouse/pull/48509) ([ltrk2](https://github.com/ltrk2)). +* Fix bytesSize() of zk SetRequest [#48512](https://github.com/ClickHouse/ClickHouse/pull/48512) ([Sergei Trifonov](https://github.com/serxa)). +* Remove dead code and unused dependencies [#48518](https://github.com/ClickHouse/ClickHouse/pull/48518) ([ltrk2](https://github.com/ltrk2)). +* Use forward declaration of ThreadPool [#48519](https://github.com/ClickHouse/ClickHouse/pull/48519) ([Azat Khuzhin](https://github.com/azat)). +* Use std::string_view instead of strlen [#48520](https://github.com/ClickHouse/ClickHouse/pull/48520) ([ltrk2](https://github.com/ltrk2)). +* Use std::string::starts_with instead of a roll your own variant [#48521](https://github.com/ClickHouse/ClickHouse/pull/48521) ([ltrk2](https://github.com/ltrk2)). +* Fix flaky `test_alternative_keeper_config` [#48533](https://github.com/ClickHouse/ClickHouse/pull/48533) ([Antonio Andelic](https://github.com/antonio2368)). +* Use one ThreadGroup while pushing to materialized views (and some refactoring for ThreadGroup) [#48543](https://github.com/ClickHouse/ClickHouse/pull/48543) ([Azat Khuzhin](https://github.com/azat)). +* Fix some tests [#48550](https://github.com/ClickHouse/ClickHouse/pull/48550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 02477_projection_materialize_and_zero_copy flakiness (due to index granularity randomization) [#48551](https://github.com/ClickHouse/ClickHouse/pull/48551) ([Azat Khuzhin](https://github.com/azat)). +* Better exception message for ZSTD [#48552](https://github.com/ClickHouse/ClickHouse/pull/48552) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove misleading comment and block [#48562](https://github.com/ClickHouse/ClickHouse/pull/48562) ([Sergei Trifonov](https://github.com/serxa)). +* Update 02207_allow_plaintext_and_no_password.sh [#48566](https://github.com/ClickHouse/ClickHouse/pull/48566) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* bugfix: compare Bits and sizeof(Arithmetic) * 8 [#48569](https://github.com/ClickHouse/ClickHouse/pull/48569) ([caipengxiang](https://github.com/awfeequdng)). +* Remove superfluous includes of logger_userful.h from headers [#48570](https://github.com/ClickHouse/ClickHouse/pull/48570) ([Azat Khuzhin](https://github.com/azat)). +* Remove slow test from debug builds [#48574](https://github.com/ClickHouse/ClickHouse/pull/48574) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't use type conversion with String query parameters [#48577](https://github.com/ClickHouse/ClickHouse/pull/48577) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix TSan report in Kerberos [#48579](https://github.com/ClickHouse/ClickHouse/pull/48579) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add second_deadlock_stack=1 for TSan on CI and fix some lock-order-inversion problems [#48596](https://github.com/ClickHouse/ClickHouse/pull/48596) ([Azat Khuzhin](https://github.com/azat)). +* Fix LOGICAL_ERROR in executable table function [#48605](https://github.com/ClickHouse/ClickHouse/pull/48605) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix flakiness of test_store_cleanup in case of image rebuild [#48610](https://github.com/ClickHouse/ClickHouse/pull/48610) ([Azat Khuzhin](https://github.com/azat)). +* Remove strange code [#48612](https://github.com/ClickHouse/ClickHouse/pull/48612) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor refactoring of formatDateTime() [#48627](https://github.com/ClickHouse/ClickHouse/pull/48627) ([Robert Schulze](https://github.com/rschu1ze)). +* Better handling of values too large for VarInt encoding [#48628](https://github.com/ClickHouse/ClickHouse/pull/48628) ([Robert Schulze](https://github.com/rschu1ze)). +* refine some messages of exception in regexp tree [#48632](https://github.com/ClickHouse/ClickHouse/pull/48632) ([Han Fei](https://github.com/hanfei1991)). +* Partially revert e0252db8d and fix pr-bugfix labeling [#48637](https://github.com/ClickHouse/ClickHouse/pull/48637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix build src/Interpreters/InterpreterInsertQuery.h [#48638](https://github.com/ClickHouse/ClickHouse/pull/48638) ([Vladimir C](https://github.com/vdimir)). +* Fix build ThreadGroupPtr [#48641](https://github.com/ClickHouse/ClickHouse/pull/48641) ([Vladimir C](https://github.com/vdimir)). +* Fix flaky test test_drop_replica_and_achieve_quorum [#48642](https://github.com/ClickHouse/ClickHouse/pull/48642) ([Kruglov Pavel](https://github.com/Avogar)). +* fix 02504_regexp_dictionary_table_source [#48662](https://github.com/ClickHouse/ClickHouse/pull/48662) ([Han Fei](https://github.com/hanfei1991)). +* Remove strange code from MutateTask [#48666](https://github.com/ClickHouse/ClickHouse/pull/48666) ([alesapin](https://github.com/alesapin)). +* SonarCloud: C++ Reporting Standards [#48668](https://github.com/ClickHouse/ClickHouse/pull/48668) ([Julio Jimenez](https://github.com/juliojimenez)). +* Remove lock for duplicated parts UUIDs (allow_experimental_query_deduplication=1) [#48670](https://github.com/ClickHouse/ClickHouse/pull/48670) ([Azat Khuzhin](https://github.com/azat)). +* show result of minio listings for test test_attach_detach_partition [#48674](https://github.com/ClickHouse/ClickHouse/pull/48674) ([Sema Checherinda](https://github.com/CheSema)). +* Fix tests for analyzer [#48675](https://github.com/ClickHouse/ClickHouse/pull/48675) ([Igor Nikonov](https://github.com/devcrafter)). +* Call IProcessor::onCancel() once [#48687](https://github.com/ClickHouse/ClickHouse/pull/48687) ([Igor Nikonov](https://github.com/devcrafter)). +* Update MergeTree syntax for optional index granularity argument [#48692](https://github.com/ClickHouse/ClickHouse/pull/48692) ([Robert Schulze](https://github.com/rschu1ze)). +* Add test for old bug [#7826](https://github.com/ClickHouse/ClickHouse/issues/7826) [#48697](https://github.com/ClickHouse/ClickHouse/pull/48697) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky `test_keeper_session` [#48699](https://github.com/ClickHouse/ClickHouse/pull/48699) ([Antonio Andelic](https://github.com/antonio2368)). +* Better messages formatting in the CI Slack bot [#48712](https://github.com/ClickHouse/ClickHouse/pull/48712) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add trusted contributors [#48715](https://github.com/ClickHouse/ClickHouse/pull/48715) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Do not remove broken detached parts on startup [#48730](https://github.com/ClickHouse/ClickHouse/pull/48730) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove `-Wshadow` suppression which leaked into global namespace [#48737](https://github.com/ClickHouse/ClickHouse/pull/48737) ([Robert Schulze](https://github.com/rschu1ze)). +* VarInt coding: Always perform sanity check [#48740](https://github.com/ClickHouse/ClickHouse/pull/48740) ([Robert Schulze](https://github.com/rschu1ze)). +* Try to fix flaky 02455_one_row_from_csv_memory_usage [#48756](https://github.com/ClickHouse/ClickHouse/pull/48756) ([Dmitry Novik](https://github.com/novikd)). +* insert UInt32 Hashvalue in reverse order on big endian machine [#48764](https://github.com/ClickHouse/ClickHouse/pull/48764) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Limit size of messages from the CI slack bot [#48766](https://github.com/ClickHouse/ClickHouse/pull/48766) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update README.md [#48776](https://github.com/ClickHouse/ClickHouse/pull/48776) ([Tyler Hannan](https://github.com/tylerhannan)). +* Remove duplicate definition of SingleEndpointHTTPSessionPool [#48779](https://github.com/ClickHouse/ClickHouse/pull/48779) ([JaySon](https://github.com/JaySon-Huang)). +* Fix flaky test_version_update_after_mutation/test.py::test_upgrade_while_mutation [#48783](https://github.com/ClickHouse/ClickHouse/pull/48783) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test test_backup_all [#48789](https://github.com/ClickHouse/ClickHouse/pull/48789) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix a confusing warning about interserver mode [#48793](https://github.com/ClickHouse/ClickHouse/pull/48793) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Store clusters from ClusterDiscovery in separate map [#48795](https://github.com/ClickHouse/ClickHouse/pull/48795) ([Vladimir C](https://github.com/vdimir)). +* Reimplement [#48790](https://github.com/ClickHouse/ClickHouse/issues/48790) [#48797](https://github.com/ClickHouse/ClickHouse/pull/48797) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow running integration tests without spark [#48803](https://github.com/ClickHouse/ClickHouse/pull/48803) ([Vitaly Baranov](https://github.com/vitlibar)). +* forbid gwpsan in debug mode to rescue stress tests [#48804](https://github.com/ClickHouse/ClickHouse/pull/48804) ([Han Fei](https://github.com/hanfei1991)). +* Simplify FileCacheFactory [#48805](https://github.com/ClickHouse/ClickHouse/pull/48805) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix double whitespace in exception message [#48815](https://github.com/ClickHouse/ClickHouse/pull/48815) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#38128](https://github.com/ClickHouse/ClickHouse/issues/38128) [#48817](https://github.com/ClickHouse/ClickHouse/pull/48817) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove excessive logging [#48826](https://github.com/ClickHouse/ClickHouse/pull/48826) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* remove duplicate indentwith in clang-format [#48834](https://github.com/ClickHouse/ClickHouse/pull/48834) ([cluster](https://github.com/infdahai)). +* Try fix flacky test_concurrent_alter_move_and_drop [#48843](https://github.com/ClickHouse/ClickHouse/pull/48843) ([Sergei Trifonov](https://github.com/serxa)). +* fix the race wait loading parts [#48844](https://github.com/ClickHouse/ClickHouse/pull/48844) ([Sema Checherinda](https://github.com/CheSema)). +* suppress assert of progress for test_system_replicated_fetches [#48856](https://github.com/ClickHouse/ClickHouse/pull/48856) ([Han Fei](https://github.com/hanfei1991)). +* Fix: do not run test_store_cleanup_disk_s3 in parallel [#48863](https://github.com/ClickHouse/ClickHouse/pull/48863) ([Igor Nikonov](https://github.com/devcrafter)). +* Update README.md [#48883](https://github.com/ClickHouse/ClickHouse/pull/48883) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix test reference files for join using nullable column [#48893](https://github.com/ClickHouse/ClickHouse/pull/48893) ([Vladimir C](https://github.com/vdimir)). +* bitNot marked as NO_SANITIZE_UNDEFINED [#48899](https://github.com/ClickHouse/ClickHouse/pull/48899) ([Vladimir C](https://github.com/vdimir)). +* Fix order by in test_storage_delta [#48903](https://github.com/ClickHouse/ClickHouse/pull/48903) ([Vladimir C](https://github.com/vdimir)). +* Fix segfault when set is not built yet [#48904](https://github.com/ClickHouse/ClickHouse/pull/48904) ([Alexander Gololobov](https://github.com/davenger)). +* A non significant change (does not affect anything): add support for signed integers in the maskBits function [#48920](https://github.com/ClickHouse/ClickHouse/pull/48920) ([caipengxiang](https://github.com/awfeequdng)). +* Follow-up to [#48866](https://github.com/ClickHouse/ClickHouse/issues/48866) [#48929](https://github.com/ClickHouse/ClickHouse/pull/48929) ([Robert Schulze](https://github.com/rschu1ze)). +* Un-flake 01079_new_range_reader_segfault [#48934](https://github.com/ClickHouse/ClickHouse/pull/48934) ([Robert Schulze](https://github.com/rschu1ze)). +* Add building stage to the fasttests report, respect existing status on rerun [#48935](https://github.com/ClickHouse/ClickHouse/pull/48935) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update Settings.h [#48948](https://github.com/ClickHouse/ClickHouse/pull/48948) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update cluster.py [#48949](https://github.com/ClickHouse/ClickHouse/pull/48949) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Docs: Replace annoying three spaces in enumerations by a single space [#48951](https://github.com/ClickHouse/ClickHouse/pull/48951) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky 02706_arrow_different_dictionaries [#48952](https://github.com/ClickHouse/ClickHouse/pull/48952) ([Kruglov Pavel](https://github.com/Avogar)). +* Use default `{replica}`, `{shard}` arguments in Replicated engine [#48961](https://github.com/ClickHouse/ClickHouse/pull/48961) ([Nikolay Degterinsky](https://github.com/evillique)). +* Rename quantileApprox -> quantileGK [#48969](https://github.com/ClickHouse/ClickHouse/pull/48969) ([Vladimir C](https://github.com/vdimir)). +* Don't throw logical error when column is not found in Parquet/Arrow schema [#48987](https://github.com/ClickHouse/ClickHouse/pull/48987) ([Kruglov Pavel](https://github.com/Avogar)). +* Reimplement [#48986](https://github.com/ClickHouse/ClickHouse/issues/48986) [#49005](https://github.com/ClickHouse/ClickHouse/pull/49005) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Dont allow bad changelogs [#49006](https://github.com/ClickHouse/ClickHouse/pull/49006) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update README.md [#49007](https://github.com/ClickHouse/ClickHouse/pull/49007) ([Nick-71](https://github.com/Nick-71)). +* Remove outdated test [#49014](https://github.com/ClickHouse/ClickHouse/pull/49014) ([alesapin](https://github.com/alesapin)). +* Fix typo [#49027](https://github.com/ClickHouse/ClickHouse/pull/49027) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix assertion after [#48636](https://github.com/ClickHouse/ClickHouse/issues/48636) [#49029](https://github.com/ClickHouse/ClickHouse/pull/49029) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix build error for big-endian platforms [#49037](https://github.com/ClickHouse/ClickHouse/pull/49037) ([ltrk2](https://github.com/ltrk2)). +* Update version_date.tsv and changelogs after v22.8.17.17-lts [#49046](https://github.com/ClickHouse/ClickHouse/pull/49046) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.1.7.30-stable [#49047](https://github.com/ClickHouse/ClickHouse/pull/49047) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.2.37-lts [#49048](https://github.com/ClickHouse/ClickHouse/pull/49048) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Remove some code [#49054](https://github.com/ClickHouse/ClickHouse/pull/49054) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove some dead code in poco [#49075](https://github.com/ClickHouse/ClickHouse/pull/49075) ([Robert Schulze](https://github.com/rschu1ze)). +* Prevent false positive report by static analyzer [#49078](https://github.com/ClickHouse/ClickHouse/pull/49078) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.2.6.34-stable [#49080](https://github.com/ClickHouse/ClickHouse/pull/49080) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Enforce documentation change for a new-feature PR [#49090](https://github.com/ClickHouse/ClickHouse/pull/49090) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update clickhouse-test [#49094](https://github.com/ClickHouse/ClickHouse/pull/49094) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable long 02581 in debug, enable with sanitizers [#49105](https://github.com/ClickHouse/ClickHouse/pull/49105) ([Alexander Gololobov](https://github.com/davenger)). +* Fix flaky integration test test_async_query_sending [#49107](https://github.com/ClickHouse/ClickHouse/pull/49107) ([Kruglov Pavel](https://github.com/Avogar)). +* Correct functional test to reflect interoperability [#49108](https://github.com/ClickHouse/ClickHouse/pull/49108) ([ltrk2](https://github.com/ltrk2)). +* Cleanup build guide [#49119](https://github.com/ClickHouse/ClickHouse/pull/49119) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix building iceberg without avro [#49125](https://github.com/ClickHouse/ClickHouse/pull/49125) ([Azat Khuzhin](https://github.com/azat)). +* Add slash for close tag of user_defined_zookeeper_path [#49131](https://github.com/ClickHouse/ClickHouse/pull/49131) ([Hollin](https://github.com/Hooollin)). +* Improve some lambdas [#49133](https://github.com/ClickHouse/ClickHouse/pull/49133) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not randomize prefetch settings for debug build [#49134](https://github.com/ClickHouse/ClickHouse/pull/49134) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't throw LOGICAL_ERROR when reading from remote if there is no local replica [#49136](https://github.com/ClickHouse/ClickHouse/pull/49136) ([Raúl Marín](https://github.com/Algunenano)). +* Docs: Make caption of processors_profile_log page consistent with other pages [#49138](https://github.com/ClickHouse/ClickHouse/pull/49138) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve test reports [#49151](https://github.com/ClickHouse/ClickHouse/pull/49151) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add a note regarding private/public repo to logs [#49152](https://github.com/ClickHouse/ClickHouse/pull/49152) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* suppress two timeout tests [#49175](https://github.com/ClickHouse/ClickHouse/pull/49175) ([Han Fei](https://github.com/hanfei1991)). +* Document makeDateTime() and its variants [#49183](https://github.com/ClickHouse/ClickHouse/pull/49183) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix after [#49110](https://github.com/ClickHouse/ClickHouse/issues/49110) [#49206](https://github.com/ClickHouse/ClickHouse/pull/49206) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/en/development/architecture.md b/docs/en/development/architecture.md index dd46b294ddd..da7f95ac207 100644 --- a/docs/en/development/architecture.md +++ b/docs/en/development/architecture.md @@ -79,8 +79,8 @@ In most cases, the read method is only responsible for reading the specified col But there are notable exceptions: -- The AST query is passed to the `read` method, and the table engine can use it to derive index usage and to read fewer data from a table. -- Sometimes the table engine can process data itself to a specific stage. For example, `StorageDistributed` can send a query to remote servers, ask them to process data to a stage where data from different remote servers can be merged, and return that preprocessed data. The query interpreter then finishes processing the data. +- The AST query is passed to the `read` method, and the table engine can use it to derive index usage and to read fewer data from a table. +- Sometimes the table engine can process data itself to a specific stage. For example, `StorageDistributed` can send a query to remote servers, ask them to process data to a stage where data from different remote servers can be merged, and return that preprocessed data. The query interpreter then finishes processing the data. The table’s `read` method can return multiple `IBlockInputStream` objects to allow parallel data processing. These multiple block input streams can read from a table in parallel. Then you can wrap these streams with various transformations (such as expression evaluation or filtering) that can be calculated independently and create a `UnionBlockInputStream` on top of them, to read from multiple streams in parallel. @@ -132,9 +132,9 @@ Aggregation states can be serialized and deserialized to pass over the network d The server implements several different interfaces: -- An HTTP interface for any foreign clients. -- A TCP interface for the native ClickHouse client and for cross-server communication during distributed query execution. -- An interface for transferring data for replication. +- An HTTP interface for any foreign clients. +- A TCP interface for the native ClickHouse client and for cross-server communication during distributed query execution. +- An interface for transferring data for replication. Internally, it is just a primitive multithread server without coroutines or fibers. Since the server is not designed to process a high rate of simple queries but to process a relatively low rate of complex queries, each of them can process a vast amount of data for analytics. diff --git a/docs/en/development/build-cross-s390x.md b/docs/en/development/build-cross-s390x.md index bfb94ef7ed0..be2c37f5f41 100644 --- a/docs/en/development/build-cross-s390x.md +++ b/docs/en/development/build-cross-s390x.md @@ -90,7 +90,7 @@ Process 1 stopped ## Visual Studio Code integration -- (CodeLLDB extension)[https://github.com/vadimcn/vscode-lldb] is required for visual debugging, the (Command Variable)[https://github.com/rioj7/command-variable] extension can help dynamic launches if using (cmake variants)[https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md]. +- [CodeLLDB extension](https://github.com/vadimcn/vscode-lldb) is required for visual debugging, the [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [cmake variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md). - Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"` - Launcher: ```json diff --git a/docs/en/development/build-osx.md b/docs/en/development/build-osx.md index 63d1905bb61..e65de4a37e0 100644 --- a/docs/en/development/build-osx.md +++ b/docs/en/development/build-osx.md @@ -13,7 +13,7 @@ You can install pre-built ClickHouse as described in [Quick Start](https://click The build works on x86_64 (Intel) and arm64 (Apple Silicon) based on macOS 10.15 (Catalina) or higher with Homebrew's vanilla Clang. :::note -It is also possible to compile with Apple's XCode `apple-clang` or Homebrew's `gcc`, but it's strongly discouraged. +It is also possible to compile with Apple's XCode `apple-clang`, but it's strongly discouraged. ::: ## Install Homebrew {#install-homebrew} @@ -75,20 +75,6 @@ cmake --open . # The resulting binary will be created at: ./programs/Debug/clickhouse ``` -To build using Homebrew's vanilla GCC compiler (this option is only for development experiments, and is **absolutely not recommended** unless you really know what you are doing): - -``` bash -cd ClickHouse -mkdir build -export PATH=$(brew --prefix binutils)/bin:$PATH -export PATH=$(brew --prefix gcc)/bin:$PATH -export CC=$(brew --prefix gcc)/bin/gcc-11 -export CXX=$(brew --prefix gcc)/bin/g++-11 -cmake -G Ninja -DCMAKE_BUILD_TYPE=RelWithDebInfo -S . -B build -cmake --build build -# The resulting binary will be created at: build/programs/clickhouse -``` - ## Caveats {#caveats} If you intend to run `clickhouse-server`, make sure to increase the system’s `maxfiles` variable. diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 00a8a54f80a..e3a63da6a3e 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -9,13 +9,15 @@ description: How to build ClickHouse on Linux Supported platforms: -- x86_64 -- AArch64 -- Power9 (experimental) +- x86_64 +- AArch64 +- Power9 (experimental) -## Normal Build for Development on Ubuntu +## Building on Ubuntu -The following tutorial is based on the Ubuntu Linux system. With appropriate changes, it should also work on any other Linux distribution. +The following tutorial is based on Ubuntu Linux. +With appropriate changes, it should also work on any other Linux distribution. +The minimum recommended Ubuntu version for development is 22.04 LTS. ### Install Prerequisites {#install-prerequisites} @@ -23,13 +25,11 @@ The following tutorial is based on the Ubuntu Linux system. With appropriate cha sudo apt-get install git cmake ccache python3 ninja-build yasm gawk ``` -Or cmake3 instead of cmake on older systems. +### Install and Use the Clang compiler -### Install the latest clang (recommended) +On Ubuntu/Debian you can use LLVM's automatic installation script, see [here](https://apt.llvm.org/). -On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/)) - -```bash +``` bash sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` @@ -40,19 +40,17 @@ sudo apt-get install software-properties-common sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test ``` -For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html). +For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). -#### Use the latest clang for Builds +As of April 2023, any version of Clang >= 15 will work. +GCC as a compiler is not supported +To build with a specific Clang version: ``` bash export CC=clang-15 export CXX=clang++-15 ``` -In this example we use version 15 that is the latest as of Sept 2022. - -Gcc cannot be used. - ### Checkout ClickHouse Sources {#checkout-clickhouse-sources} ``` bash @@ -70,79 +68,46 @@ git clone --recursive --shallow-submodules https://github.com/ClickHouse/ClickHo ``` bash cd ClickHouse mkdir build -cd build -cmake .. -ninja +cmake -S . -B build +cmake --build build # or: `cd build; ninja` ``` -To create an executable, run `ninja clickhouse`. -This will create the `programs/clickhouse` executable, which can be used with `client` or `server` arguments. +To create an executable, run `cmake --build --target clickhouse` (or: `cd build; ninja clickhouse`). +This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments. -## How to Build ClickHouse on Any Linux {#how-to-build-clickhouse-on-any-linux} +## Building on Any Linux {#how-to-build-clickhouse-on-any-linux} The build requires the following components: -- Git (is used only to checkout the sources, it’s not needed for the build) -- CMake 3.15 or newer -- Ninja -- C++ compiler: clang-15 or newer -- Linker: lld -- Yasm -- Gawk +- Git (used to checkout the sources, not needed for the build) +- CMake 3.20 or newer +- Compiler: Clang 15 or newer +- Linker: lld 15 or newer +- Ninja +- Yasm +- Gawk If all the components are installed, you may build in the same way as the steps above. -Example for Ubuntu Eoan: -``` bash -sudo apt update -sudo apt install git cmake ninja-build clang++ python yasm gawk -git clone --recursive https://github.com/ClickHouse/ClickHouse.git -mkdir build && cd build -cmake ../ClickHouse -ninja -``` - Example for OpenSUSE Tumbleweed: + ``` bash sudo zypper install git cmake ninja clang-c++ python lld yasm gawk git clone --recursive https://github.com/ClickHouse/ClickHouse.git -mkdir build && cd build -cmake ../ClickHouse -ninja +mkdir build +cmake -S . -B build +cmake --build build ``` Example for Fedora Rawhide: + ``` bash sudo yum update sudo yum --nogpg install git cmake make clang python3 ccache yasm gawk git clone --recursive https://github.com/ClickHouse/ClickHouse.git -mkdir build && cd build -cmake ../ClickHouse -make -j $(nproc) -``` - -Here is an example of how to build `clang` and all the llvm infrastructure from sources: - -``` -git clone git@github.com:llvm/llvm-project.git -mkdir llvm-build && cd llvm-build -cmake -DCMAKE_BUILD_TYPE:STRING=Release -DLLVM_ENABLE_PROJECTS=all ../llvm-project/llvm/ -make -j16 -sudo make install -hash clang -clang --version -``` - -You can install the older clang like clang-11 from packages and then use it to build the new clang from sources. - -Here is an example of how to install the new `cmake` from the official website: - -``` -wget https://github.com/Kitware/CMake/releases/download/v3.22.2/cmake-3.22.2-linux-x86_64.sh -chmod +x cmake-3.22.2-linux-x86_64.sh -./cmake-3.22.2-linux-x86_64.sh -export PATH=/home/milovidov/work/cmake-3.22.2-linux-x86_64/bin/:${PATH} -hash cmake +mkdir build +cmake -S . -B build +cmake --build build ``` ## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse} diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index ace5ab79bb4..6bcdadeb1eb 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -39,9 +39,15 @@ Next, you need to download the source files onto your working machine. This is c In the command line terminal run: - git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git + git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git cd ClickHouse +Or (if you'd like to use sparse checkout for submodules and avoid checking out unneeded files): + + git clone git@github.com:your_github_username/ClickHouse.git + cd ClickHouse + ./contrib/update-submodules.sh + Note: please, substitute *your_github_username* with what is appropriate! This command will create a directory `ClickHouse` containing the working copy of the project. diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 78b1d1e9ebb..5b03468623d 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -370,8 +370,8 @@ If the file contains a single function, name the file the same way as the functi **11.** If the name contains an abbreviation, then: -- For variable names, the abbreviation should use lowercase letters `mysql_connection` (not `mySQL_connection`). -- For names of classes and functions, keep the uppercase letters in the abbreviation`MySQLConnection` (not `MySqlConnection`). +- For variable names, the abbreviation should use lowercase letters `mysql_connection` (not `mySQL_connection`). +- For names of classes and functions, keep the uppercase letters in the abbreviation`MySQLConnection` (not `MySqlConnection`). **12.** Constructor arguments that are used just to initialize the class members should be named the same way as the class members, but with an underscore at the end. @@ -434,9 +434,9 @@ In application code, memory must be freed by the object that owns it. Examples: -- The easiest way is to place an object on the stack, or make it a member of another class. -- For a large number of small objects, use containers. -- For automatic deallocation of a small number of objects that reside in the heap, use `shared_ptr/unique_ptr`. +- The easiest way is to place an object on the stack, or make it a member of another class. +- For a large number of small objects, use containers. +- For automatic deallocation of a small number of objects that reside in the heap, use `shared_ptr/unique_ptr`. **2.** Resource management. @@ -504,10 +504,10 @@ This is not recommended, but it is allowed. Use the following options: -- Create a function (`done()` or `finalize()`) that will do all the work in advance that might lead to an exception. If that function was called, there should be no exceptions in the destructor later. -- Tasks that are too complex (such as sending messages over the network) can be put in separate method that the class user will have to call before destruction. -- If there is an exception in the destructor, it’s better to log it than to hide it (if the logger is available). -- In simple applications, it is acceptable to rely on `std::terminate` (for cases of `noexcept` by default in C++11) to handle exceptions. +- Create a function (`done()` or `finalize()`) that will do all the work in advance that might lead to an exception. If that function was called, there should be no exceptions in the destructor later. +- Tasks that are too complex (such as sending messages over the network) can be put in separate method that the class user will have to call before destruction. +- If there is an exception in the destructor, it’s better to log it than to hide it (if the logger is available). +- In simple applications, it is acceptable to rely on `std::terminate` (for cases of `noexcept` by default in C++11) to handle exceptions. **6.** Anonymous code blocks. @@ -529,11 +529,11 @@ ready_any.set(); In offline data processing programs: -- Try to get the best possible performance on a single CPU core. You can then parallelize your code if necessary. +- Try to get the best possible performance on a single CPU core. You can then parallelize your code if necessary. In server applications: -- Use the thread pool to process requests. At this point, we haven’t had any tasks that required userspace context switching. +- Use the thread pool to process requests. At this point, we haven’t had any tasks that required userspace context switching. Fork is not used for parallelization. diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 98dbe5f8d57..1d3e7d4964e 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -194,11 +194,11 @@ If the system clickhouse-server is already running and you do not want to stop i Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well. Examples: -- cross-compile for Darwin x86_64 (macOS) -- cross-compile for FreeBSD x86_64 -- cross-compile for Linux AArch64 -- build on Ubuntu with libraries from system packages (discouraged) -- build with shared linking of libraries (discouraged) +- cross-compile for Darwin x86_64 (macOS) +- cross-compile for FreeBSD x86_64 +- cross-compile for Linux AArch64 +- build on Ubuntu with libraries from system packages (discouraged) +- build with shared linking of libraries (discouraged) For example, build with system packages is bad practice, because we cannot guarantee what exact version of packages a system will have. But this is really needed by Debian maintainers. For this reason we at least have to support this variant of build. Another example: shared linking is a common source of trouble, but it is needed for some enthusiasts. diff --git a/docs/en/engines/database-engines/atomic.md b/docs/en/engines/database-engines/atomic.md index 3ea5008c80a..0224e1aba21 100644 --- a/docs/en/engines/database-engines/atomic.md +++ b/docs/en/engines/database-engines/atomic.md @@ -59,4 +59,4 @@ For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table ## See Also -- [system.databases](../../operations/system-tables/databases.md) system table +- [system.databases](../../operations/system-tables/databases.md) system table diff --git a/docs/en/engines/database-engines/index.md b/docs/en/engines/database-engines/index.md index 835383f503f..233cbbb4247 100644 --- a/docs/en/engines/database-engines/index.md +++ b/docs/en/engines/database-engines/index.md @@ -11,18 +11,18 @@ Database engines allow you to work with tables. By default, ClickHouse uses the Here is a complete list of available database engines. Follow the links for more details: -- [Atomic](../../engines/database-engines/atomic.md) +- [Atomic](../../engines/database-engines/atomic.md) -- [MySQL](../../engines/database-engines/mysql.md) +- [MySQL](../../engines/database-engines/mysql.md) -- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) +- [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) -- [Lazy](../../engines/database-engines/lazy.md) +- [Lazy](../../engines/database-engines/lazy.md) -- [PostgreSQL](../../engines/database-engines/postgresql.md) +- [PostgreSQL](../../engines/database-engines/postgresql.md) -- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) +- [MaterializedPostgreSQL](../../engines/database-engines/materialized-postgresql.md) -- [Replicated](../../engines/database-engines/replicated.md) +- [Replicated](../../engines/database-engines/replicated.md) -- [SQLite](../../engines/database-engines/sqlite.md) +- [SQLite](../../engines/database-engines/sqlite.md) diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index 45719b1340e..1f1c996d4bf 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -22,10 +22,10 @@ ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'passwo **Engine Parameters** -- `host:port` — MySQL server endpoint. -- `database` — MySQL database name. -- `user` — MySQL user. -- `password` — User password. +- `host:port` — MySQL server endpoint. +- `database` — MySQL database name. +- `user` — MySQL user. +- `password` — User password. ## Engine Settings diff --git a/docs/en/engines/database-engines/materialized-postgresql.md b/docs/en/engines/database-engines/materialized-postgresql.md index 32e3435afa2..08e9f998626 100644 --- a/docs/en/engines/database-engines/materialized-postgresql.md +++ b/docs/en/engines/database-engines/materialized-postgresql.md @@ -26,10 +26,10 @@ ENGINE = MaterializedPostgreSQL('host:port', 'database', 'user', 'password') [SE **Engine Parameters** -- `host:port` — PostgreSQL server endpoint. -- `database` — PostgreSQL database name. -- `user` — PostgreSQL user. -- `password` — User password. +- `host:port` — PostgreSQL server endpoint. +- `database` — PostgreSQL database name. +- `user` — PostgreSQL user. +- `password` — User password. ## Example of Use {#example-of-use} @@ -120,9 +120,9 @@ Warning: for this case dots in table name are not allowed. 2. Each replicated table must have one of the following [replica identity](https://www.postgresql.org/docs/10/sql-altertable.html#SQL-CREATETABLE-REPLICA-IDENTITY): -- primary key (by default) +- primary key (by default) -- index +- index ``` bash postgres# CREATE TABLE postgres_table (a Integer NOT NULL, b Integer, c Integer NOT NULL, d Integer, e Integer NOT NULL); @@ -171,7 +171,7 @@ Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.htm Possible values: - - Positive integer. + - Positive integer. Default value: `65536`. diff --git a/docs/en/engines/database-engines/mysql.md b/docs/en/engines/database-engines/mysql.md index 7c8c3459ec5..20434ad124e 100644 --- a/docs/en/engines/database-engines/mysql.md +++ b/docs/en/engines/database-engines/mysql.md @@ -12,9 +12,9 @@ The `MySQL` database engine translate queries to the MySQL server so you can per You cannot perform the following queries: -- `RENAME` -- `CREATE TABLE` -- `ALTER` +- `RENAME` +- `CREATE TABLE` +- `ALTER` ## Creating a Database {#creating-a-database} @@ -25,10 +25,10 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') **Engine Parameters** -- `host:port` — MySQL server address. -- `database` — Remote database name. -- `user` — MySQL user. -- `password` — User password. +- `host:port` — MySQL server address. +- `database` — Remote database name. +- `user` — MySQL user. +- `password` — User password. ## Data Types Support {#data_types-support} diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md index 939995a61c5..294d1202bdd 100644 --- a/docs/en/engines/database-engines/postgresql.md +++ b/docs/en/engines/database-engines/postgresql.md @@ -21,12 +21,12 @@ ENGINE = PostgreSQL('host:port', 'database', 'user', 'password'[, `schema`, `use **Engine Parameters** -- `host:port` — PostgreSQL server address. -- `database` — Remote database name. -- `user` — PostgreSQL user. -- `password` — User password. -- `schema` — PostgreSQL schema. -- `use_table_cache` — Defines if the database table structure is cached or not. Optional. Default value: `0`. +- `host:port` — PostgreSQL server address. +- `database` — Remote database name. +- `user` — PostgreSQL user. +- `password` — User password. +- `schema` — PostgreSQL schema. +- `use_table_cache` — Defines if the database table structure is cached or not. Optional. Default value: `0`. ## Data Types Support {#data_types-support} @@ -140,3 +140,4 @@ DESCRIBE TABLE test_database.test_table; ## Related content - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) +- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2) diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index 43d1ce5ec3f..5672633c4a2 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -17,9 +17,9 @@ CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_na **Engine Parameters** -- `zoo_path` — ZooKeeper path. The same ZooKeeper path corresponds to the same database. -- `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`. -- `replica_name` — Replica name. Replica names must be different for all replicas of the same shard. +- `zoo_path` — ZooKeeper path. The same ZooKeeper path corresponds to the same database. +- `shard_name` — Shard name. Database replicas are grouped into shards by `shard_name`. +- `replica_name` — Replica name. Replica names must be different for all replicas of the same shard. For [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) tables if no arguments provided, then default arguments are used: `/clickhouse/tables/{uuid}/{shard}` and `{replica}`. These can be changed in the server settings [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) and [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Macro `{uuid}` is unfolded to table's uuid, `{shard}` and `{replica}` are unfolded to values from server config, not from database engine arguments. But in the future, it will be possible to use `shard_name` and `replica_name` of Replicated database. diff --git a/docs/en/engines/database-engines/sqlite.md b/docs/en/engines/database-engines/sqlite.md index eef0bb84088..fc2a6525a68 100644 --- a/docs/en/engines/database-engines/sqlite.md +++ b/docs/en/engines/database-engines/sqlite.md @@ -17,7 +17,7 @@ Allows to connect to [SQLite](https://www.sqlite.org/index.html) database and pe **Engine Parameters** -- `db_path` — Path to a file with SQLite database. +- `db_path` — Path to a file with SQLite database. ## Data Types Support {#data_types-support} diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index 31563e2e727..d7c582164de 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -9,12 +9,12 @@ toc_title: Introduction The table engine (type of table) determines: -- How and where data is stored, where to write it to, and where to read it from. -- Which queries are supported, and how. -- Concurrent data access. -- Use of indexes, if present. -- Whether multithread request execution is possible. -- Data replication parameters. +- How and where data is stored, where to write it to, and where to read it from. +- Which queries are supported, and how. +- Concurrent data access. +- Use of indexes, if present. +- Whether multithread request execution is possible. +- Data replication parameters. ## Engine Families {#engine-families} @@ -24,13 +24,13 @@ The most universal and functional table engines for high-load tasks. The propert Engines in the family: -- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree) -- [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree) -- [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree) -- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree) -- [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) -- [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) -- [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) +- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree) +- [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree) +- [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree) +- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree) +- [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) +- [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) +- [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) ### Log {#log} @@ -38,9 +38,9 @@ Lightweight [engines](../../engines/table-engines/log-family/index.md) with mini Engines in the family: -- [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog) -- [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog) -- [Log](../../engines/table-engines/log-family/log.md#log) +- [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog) +- [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog) +- [Log](../../engines/table-engines/log-family/log.md#log) ### Integration Engines {#integration-engines} @@ -49,34 +49,34 @@ Engines for communicating with other data storage and processing systems. Engines in the family: -- [ODBC](../../engines/table-engines/integrations/odbc.md) -- [JDBC](../../engines/table-engines/integrations/jdbc.md) -- [MySQL](../../engines/table-engines/integrations/mysql.md) -- [MongoDB](../../engines/table-engines/integrations/mongodb.md) -- [HDFS](../../engines/table-engines/integrations/hdfs.md) -- [S3](../../engines/table-engines/integrations/s3.md) -- [Kafka](../../engines/table-engines/integrations/kafka.md) -- [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) -- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md) -- [PostgreSQL](../../engines/table-engines/integrations/postgresql.md) +- [ODBC](../../engines/table-engines/integrations/odbc.md) +- [JDBC](../../engines/table-engines/integrations/jdbc.md) +- [MySQL](../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../engines/table-engines/integrations/mongodb.md) +- [HDFS](../../engines/table-engines/integrations/hdfs.md) +- [S3](../../engines/table-engines/integrations/s3.md) +- [Kafka](../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md) +- [PostgreSQL](../../engines/table-engines/integrations/postgresql.md) ### Special Engines {#special-engines} Engines in the family: -- [Distributed](../../engines/table-engines/special/distributed.md#distributed) -- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview) -- [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) -- [Merge](../../engines/table-engines/special/merge.md#merge) -- [File](../../engines/table-engines/special/file.md#file) -- [Null](../../engines/table-engines/special/null.md#null) -- [Set](../../engines/table-engines/special/set.md#set) -- [Join](../../engines/table-engines/special/join.md#join) -- [URL](../../engines/table-engines/special/url.md#table_engines-url) -- [View](../../engines/table-engines/special/view.md#table_engines-view) -- [Memory](../../engines/table-engines/special/memory.md#memory) -- [Buffer](../../engines/table-engines/special/buffer.md#buffer) -- [KeeperMap](../../engines/table-engines/special/keepermap.md) +- [Distributed](../../engines/table-engines/special/distributed.md#distributed) +- [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview) +- [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) +- [Merge](../../engines/table-engines/special/merge.md#merge) +- [File](../../engines/table-engines/special/file.md#file) +- [Null](../../engines/table-engines/special/null.md#null) +- [Set](../../engines/table-engines/special/set.md#set) +- [Join](../../engines/table-engines/special/join.md#join) +- [URL](../../engines/table-engines/special/url.md#table_engines-url) +- [View](../../engines/table-engines/special/view.md#table_engines-view) +- [Memory](../../engines/table-engines/special/memory.md#memory) +- [Buffer](../../engines/table-engines/special/buffer.md#buffer) +- [KeeperMap](../../engines/table-engines/special/keepermap.md) ## Virtual Columns {#table_engines-virtual_columns} diff --git a/docs/en/engines/table-engines/integrations/ExternalDistributed.md b/docs/en/engines/table-engines/integrations/ExternalDistributed.md index 4e0f66ebd4f..3fb3fe88b55 100644 --- a/docs/en/engines/table-engines/integrations/ExternalDistributed.md +++ b/docs/en/engines/table-engines/integrations/ExternalDistributed.md @@ -22,17 +22,17 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the original table structure: -- Column names should be the same as in the original table, but you can use just some of these columns and in any order. -- Column types may differ from those in the original table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. +- Column names should be the same as in the original table, but you can use just some of these columns and in any order. +- Column types may differ from those in the original table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. **Engine Parameters** -- `engine` — The table engine `MySQL` or `PostgreSQL`. -- `host:port` — MySQL or PostgreSQL server address. -- `database` — Remote database name. -- `table` — Remote table name. -- `user` — User name. -- `password` — User password. +- `engine` — The table engine `MySQL` or `PostgreSQL`. +- `host:port` — MySQL or PostgreSQL server address. +- `database` — Remote database name. +- `table` — Remote table name. +- `user` — User name. +- `password` — User password. ## Implementation Details {#implementation-details} @@ -48,6 +48,6 @@ You can specify any number of shards and any number of replicas for each shard. **See Also** -- [MySQL table engine](../../../engines/table-engines/integrations/mysql.md) -- [PostgreSQL table engine](../../../engines/table-engines/integrations/postgresql.md) -- [Distributed table engine](../../../engines/table-engines/special/distributed.md) +- [MySQL table engine](../../../engines/table-engines/integrations/mysql.md) +- [PostgreSQL table engine](../../../engines/table-engines/integrations/postgresql.md) +- [Distributed table engine](../../../engines/table-engines/special/distributed.md) diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index 99183ac7308..3e2e177e28f 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -18,8 +18,8 @@ CREATE TABLE deltalake **Engine parameters** -- `url` — Bucket url with path to the existing Delta Lake table. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. +- `url` — Bucket url with path to the existing Delta Lake table. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) @@ -49,4 +49,4 @@ CREATE TABLE deltalake ENGINE=DeltaLake(deltalake_conf, filename = 'test_table') ## See also -- [deltaLake table function](../../../sql-reference/table-functions/deltalake.md) +- [deltaLake table function](../../../sql-reference/table-functions/deltalake.md) diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 1497ea47eca..b9db0fae68f 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -17,7 +17,7 @@ ENGINE = HDFS(URI, format) **Engine Parameters** - `URI` - whole file URI in HDFS. The path part of `URI` may contain globs. In this case the table would be readonly. -- `format` - specifies one of the available file formats. To perform +- `format` - specifies one of the available file formats. To perform `SELECT` queries, the format must be supported for input, and to perform `INSERT` queries – for output. The available formats are listed in the [Formats](../../../interfaces/formats.md#formats) section. @@ -58,11 +58,11 @@ SELECT * FROM hdfs_engine_table LIMIT 2 ## Implementation Details {#implementation-details} -- Reads and writes can be parallel. -- Not supported: - - `ALTER` and `SELECT...SAMPLE` operations. - - Indexes. - - [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not recommended. +- Reads and writes can be parallel. +- Not supported: + - `ALTER` and `SELECT...SAMPLE` operations. + - Indexes. + - [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not recommended. :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. @@ -72,10 +72,10 @@ SELECT * FROM hdfs_engine_table LIMIT 2 Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern. Listing of files determines during `SELECT` (not at `CREATE` moment). -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. @@ -83,12 +83,12 @@ Constructions with `{}` are similar to the [remote](../../../sql-reference/table 1. Suppose we have several files in TSV format with the following URIs on HDFS: - - 'hdfs://hdfs1:9000/some_dir/some_file_1' - - 'hdfs://hdfs1:9000/some_dir/some_file_2' - - 'hdfs://hdfs1:9000/some_dir/some_file_3' - - 'hdfs://hdfs1:9000/another_dir/some_file_1' - - 'hdfs://hdfs1:9000/another_dir/some_file_2' - - 'hdfs://hdfs1:9000/another_dir/some_file_3' + - 'hdfs://hdfs1:9000/some_dir/some_file_1' + - 'hdfs://hdfs1:9000/some_dir/some_file_2' + - 'hdfs://hdfs1:9000/some_dir/some_file_3' + - 'hdfs://hdfs1:9000/another_dir/some_file_1' + - 'hdfs://hdfs1:9000/another_dir/some_file_2' + - 'hdfs://hdfs1:9000/another_dir/some_file_3' 1. There are several ways to make a table consisting of all six files: @@ -145,7 +145,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us | **parameter** | **default value** | -| - | - | +| - | - | | rpc\_client\_connect\_tcpnodelay | true | | dfs\_client\_read\_shortcircuit | true | | output\_replace-datanode-on-failure | true | @@ -195,7 +195,7 @@ Similar to GraphiteMergeTree, the HDFS engine supports extended configuration us #### ClickHouse extras {#clickhouse-extras} | **parameter** | **default value** | -| - | - | +| - | - | |hadoop\_kerberos\_keytab | "" | |hadoop\_kerberos\_principal | "" | |libhdfs3\_conf | "" | @@ -230,9 +230,9 @@ libhdfs3 support HDFS namenode HA. ## Virtual Columns {#virtual-columns} -- `_path` — Path to the file. -- `_file` — Name of the file. +- `_path` — Path to the file. +- `_file` — Name of the file. **See Also** -- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) +- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index fd16e717c89..adcb73605bb 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -28,17 +28,17 @@ PARTITION BY expr See a detailed description of the [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) query. The table structure can differ from the original Hive table structure: -- Column names should be the same as in the original Hive table, but you can use just some of these columns and in any order, also you can use some alias columns calculated from other columns. -- Column types should be the same from those in the original Hive table. -- Partition by expression should be consistent with the original Hive table, and columns in partition by expression should be in the table structure. +- Column names should be the same as in the original Hive table, but you can use just some of these columns and in any order, also you can use some alias columns calculated from other columns. +- Column types should be the same from those in the original Hive table. +- Partition by expression should be consistent with the original Hive table, and columns in partition by expression should be in the table structure. **Engine Parameters** -- `thrift://host:port` — Hive Metastore address +- `thrift://host:port` — Hive Metastore address -- `database` — Remote database name. +- `database` — Remote database name. -- `table` — Remote table name. +- `table` — Remote table name. ## Usage Example {#usage-example} diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md index a14134ecdfa..a11e915aa3d 100644 --- a/docs/en/engines/table-engines/integrations/hudi.md +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -18,8 +18,8 @@ CREATE TABLE hudi_table **Engine parameters** -- `url` — Bucket url with the path to an existing Hudi table. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. +- `url` — Bucket url with the path to an existing Hudi table. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) @@ -49,4 +49,4 @@ CREATE TABLE hudi_table ENGINE=Hudi(hudi_conf, filename = 'test_table') ## See also -- [hudi table function](/docs/en/sql-reference/table-functions/hudi.md) +- [hudi table function](/docs/en/sql-reference/table-functions/hudi.md) diff --git a/docs/en/engines/table-engines/integrations/iceberg.md b/docs/en/engines/table-engines/integrations/iceberg.md index 4322fc6b773..77cefc9283d 100644 --- a/docs/en/engines/table-engines/integrations/iceberg.md +++ b/docs/en/engines/table-engines/integrations/iceberg.md @@ -18,8 +18,8 @@ CREATE TABLE iceberg_table **Engine parameters** -- `url` — url with the path to an existing Iceberg table. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. +- `url` — url with the path to an existing Iceberg table. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) @@ -49,4 +49,4 @@ CREATE TABLE iceberg_table ENGINE=Iceberg(iceberg_conf, filename = 'test_table') ## See also -- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md) +- [iceberg table function](/docs/en/sql-reference/table-functions/iceberg.md) diff --git a/docs/en/engines/table-engines/integrations/index.md b/docs/en/engines/table-engines/integrations/index.md index 7a8b537aea8..b321a644d32 100644 --- a/docs/en/engines/table-engines/integrations/index.md +++ b/docs/en/engines/table-engines/integrations/index.md @@ -10,20 +10,20 @@ ClickHouse provides various means for integrating with external systems, includi List of supported integrations: -- [ODBC](../../../engines/table-engines/integrations/odbc.md) -- [JDBC](../../../engines/table-engines/integrations/jdbc.md) -- [MySQL](../../../engines/table-engines/integrations/mysql.md) -- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) -- [HDFS](../../../engines/table-engines/integrations/hdfs.md) -- [S3](../../../engines/table-engines/integrations/s3.md) -- [Kafka](../../../engines/table-engines/integrations/kafka.md) -- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) -- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) -- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) -- [SQLite](../../../engines/table-engines/integrations/sqlite.md) -- [Hive](../../../engines/table-engines/integrations/hive.md) -- [ExternalDistributed](../../../engines/table-engines/integrations/ExternalDistributed.md) -- [MaterializedPostgreSQL](../../../engines/table-engines/integrations/materialized-postgresql.md) -- [NATS](../../../engines/table-engines/integrations/nats.md) -- [DeltaLake](../../../engines/table-engines/integrations/deltalake.md) -- [Hudi](../../../engines/table-engines/integrations/hudi.md) +- [ODBC](../../../engines/table-engines/integrations/odbc.md) +- [JDBC](../../../engines/table-engines/integrations/jdbc.md) +- [MySQL](../../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) +- [HDFS](../../../engines/table-engines/integrations/hdfs.md) +- [S3](../../../engines/table-engines/integrations/s3.md) +- [Kafka](../../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) +- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) +- [SQLite](../../../engines/table-engines/integrations/sqlite.md) +- [Hive](../../../engines/table-engines/integrations/hive.md) +- [ExternalDistributed](../../../engines/table-engines/integrations/ExternalDistributed.md) +- [MaterializedPostgreSQL](../../../engines/table-engines/integrations/materialized-postgresql.md) +- [NATS](../../../engines/table-engines/integrations/nats.md) +- [DeltaLake](../../../engines/table-engines/integrations/deltalake.md) +- [Hudi](../../../engines/table-engines/integrations/hudi.md) diff --git a/docs/en/engines/table-engines/integrations/jdbc.md b/docs/en/engines/table-engines/integrations/jdbc.md index 6853b5d1df7..99f851dcf3e 100644 --- a/docs/en/engines/table-engines/integrations/jdbc.md +++ b/docs/en/engines/table-engines/integrations/jdbc.md @@ -25,14 +25,14 @@ ENGINE = JDBC(datasource_uri, external_database, external_table) **Engine Parameters** -- `datasource_uri` — URI or name of an external DBMS. +- `datasource_uri` — URI or name of an external DBMS. URI Format: `jdbc:://:/?user=&password=`. Example for MySQL: `jdbc:mysql://localhost:3306/?user=root&password=root`. -- `external_database` — Database in an external DBMS. +- `external_database` — Database in an external DBMS. -- `external_table` — Name of the table in `external_database` or a select query like `select * from table1 where column1=1`. +- `external_table` — Name of the table in `external_database` or a select query like `select * from table1 where column1=1`. ## Usage Example {#usage-example} @@ -91,4 +91,4 @@ FROM system.numbers ## See Also {#see-also} -- [JDBC table function](../../../sql-reference/table-functions/jdbc.md). +- [JDBC table function](../../../sql-reference/table-functions/jdbc.md). diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index 0b1717978b7..ab69e4e90ce 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -10,9 +10,9 @@ This engine works with [Apache Kafka](http://kafka.apache.org/). Kafka lets you: -- Publish or subscribe to data flows. -- Organize fault-tolerant storage. -- Process streams as they become available. +- Publish or subscribe to data flows. +- Organize fault-tolerant storage. +- Process streams as they become available. ## Creating a Table {#table_engine-kafka-creating-a-table} @@ -46,27 +46,27 @@ SETTINGS Required parameters: -- `kafka_broker_list` — A comma-separated list of brokers (for example, `localhost:9092`). -- `kafka_topic_list` — A list of Kafka topics. -- `kafka_group_name` — A group of Kafka consumers. Reading margins are tracked for each group separately. If you do not want messages to be duplicated in the cluster, use the same group name everywhere. -- `kafka_format` — Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. +- `kafka_broker_list` — A comma-separated list of brokers (for example, `localhost:9092`). +- `kafka_topic_list` — A list of Kafka topics. +- `kafka_group_name` — A group of Kafka consumers. Reading margins are tracked for each group separately. If you do not want messages to be duplicated in the cluster, use the same group name everywhere. +- `kafka_format` — Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. Optional parameters: -- `kafka_row_delimiter` — Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.** -- `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. -- `kafka_num_consumers` — The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. Default: `1`. -- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). -- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Default: `0`. -- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block. Default: `0`. -- `kafka_client_id` — Client identifier. Empty by default. -- `kafka_poll_timeout_ms` — Timeout for single poll from Kafka. Default: [stream_poll_timeout_ms](../../../operations/settings/settings.md#stream_poll_timeout_ms). -- `kafka_poll_max_batch_size` — Maximum amount of messages to be polled in a single Kafka poll. Default: [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). -- `kafka_flush_interval_ms` — Timeout for flushing data from Kafka. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). -- `kafka_thread_per_consumer` — Provide independent thread for each consumer. When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block). Default: `0`. -- `kafka_handle_error_mode` — How to handle errors for Kafka engine. Possible values: default, stream. -- `kafka_commit_on_select` — Commit messages when select query is made. Default: `false`. -- `kafka_max_rows_per_message` — The maximum number of rows written in one kafka message for row-based formats. Default : `1`. +- `kafka_row_delimiter` — Delimiter character, which ends the message. **This setting is deprecated and is no longer used, not left for compatibility reasons.** +- `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. +- `kafka_num_consumers` — The number of consumers per table. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition, and must not be greater than the number of physical cores on the server where ClickHouse is deployed. Default: `1`. +- `kafka_max_block_size` — The maximum batch size (in messages) for poll. Default: [max_insert_block_size](../../../operations/settings/settings.md#setting-max_insert_block_size). +- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data). Default: `0`. +- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block. Default: `0`. +- `kafka_client_id` — Client identifier. Empty by default. +- `kafka_poll_timeout_ms` — Timeout for single poll from Kafka. Default: [stream_poll_timeout_ms](../../../operations/settings/settings.md#stream_poll_timeout_ms). +- `kafka_poll_max_batch_size` — Maximum amount of messages to be polled in a single Kafka poll. Default: [max_block_size](../../../operations/settings/settings.md#setting-max_block_size). +- `kafka_flush_interval_ms` — Timeout for flushing data from Kafka. Default: [stream_flush_interval_ms](../../../operations/settings/settings.md#stream-flush-interval-ms). +- `kafka_thread_per_consumer` — Provide independent thread for each consumer. When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block). Default: `0`. +- `kafka_handle_error_mode` — How to handle errors for Kafka engine. Possible values: default, stream. +- `kafka_commit_on_select` — Commit messages when select query is made. Default: `false`. +- `kafka_max_rows_per_message` — The maximum number of rows written in one kafka message for row-based formats. Default : `1`. Examples: @@ -239,14 +239,14 @@ Example: ## Virtual Columns {#virtual-columns} -- `_topic` — Kafka topic. -- `_key` — Key of the message. -- `_offset` — Offset of the message. -- `_timestamp` — Timestamp of the message. -- `_timestamp_ms` — Timestamp in milliseconds of the message. -- `_partition` — Partition of Kafka topic. -- `_headers.name` — Array of message's headers keys. -- `_headers.value` — Array of message's headers values. +- `_topic` — Kafka topic. +- `_key` — Key of the message. +- `_offset` — Offset of the message. +- `_timestamp` — Timestamp of the message. +- `_timestamp_ms` — Timestamp in milliseconds of the message. +- `_partition` — Partition of Kafka topic. +- `_headers.name` — Array of message's headers keys. +- `_headers.value` — Array of message's headers values. ## Data formats support {#data-formats-support} @@ -258,5 +258,5 @@ The number of rows in one Kafka message depends on whether the format is row-bas **See Also** -- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) -- [background_message_broker_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_message_broker_schedule_pool_size) +- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) +- [background_message_broker_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_message_broker_schedule_pool_size) diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index 3920b402a49..e112ca3bbb1 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -19,11 +19,11 @@ PRIMARY KEY key; **Engine Parameters** -- `host:port` — PostgreSQL server address. -- `database` — Remote database name. -- `table` — Remote table name. -- `user` — PostgreSQL user. -- `password` — User password. +- `host:port` — PostgreSQL server address. +- `database` — Remote database name. +- `table` — Remote table name. +- `user` — PostgreSQL user. +- `password` — User password. ## Requirements {#requirements} @@ -33,11 +33,13 @@ PRIMARY KEY key; 3. Only database [Atomic](https://en.wikipedia.org/wiki/Atomicity_(database_systems)) is allowed. +4. The `MaterializedPostgreSQL` table engine only works for PostgreSQL versions >= 11 as the implementation requires the [pg_replication_slot_advance](https://pgpedia.info/p/pg_replication_slot_advance.html) PostgreSQL function. + ## Virtual columns {#virtual-columns} -- `_version` — Transaction counter. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `_version` — Transaction counter. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `_sign` — Deletion mark. Type: [Int8](../../../sql-reference/data-types/int-uint.md). Possible values: +- `_sign` — Deletion mark. Type: [Int8](../../../sql-reference/data-types/int-uint.md). Possible values: - `1` — Row is not deleted, - `-1` — Row is deleted. diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index be45ce88c67..a647ac9993f 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -21,17 +21,17 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name **Engine Parameters** -- `host:port` — MongoDB server address. +- `host:port` — MongoDB server address. -- `database` — Remote database name. +- `database` — Remote database name. -- `collection` — Remote collection name. +- `collection` — Remote collection name. -- `user` — MongoDB user. +- `user` — MongoDB user. -- `password` — User password. +- `password` — User password. -- `options` — MongoDB connection string options (optional parameter). +- `options` — MongoDB connection string options (optional parameter). ## Usage Example {#usage-example} diff --git a/docs/en/engines/table-engines/integrations/mysql.md b/docs/en/engines/table-engines/integrations/mysql.md index 4b285ee80a5..6ff6221c877 100644 --- a/docs/en/engines/table-engines/integrations/mysql.md +++ b/docs/en/engines/table-engines/integrations/mysql.md @@ -31,25 +31,25 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the original MySQL table structure: -- Column names should be the same as in the original MySQL table, but you can use just some of these columns and in any order. -- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types. -- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. +- Column names should be the same as in the original MySQL table, but you can use just some of these columns and in any order. +- Column types may differ from those in the original MySQL table. ClickHouse tries to [cast](../../../engines/database-engines/mysql.md#data_types-support) values to the ClickHouse data types. +- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. **Engine Parameters** -- `host:port` — MySQL server address. +- `host:port` — MySQL server address. -- `database` — Remote database name. +- `database` — Remote database name. -- `table` — Remote table name. +- `table` — Remote table name. -- `user` — MySQL user. +- `user` — MySQL user. -- `password` — User password. +- `password` — User password. -- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is substituted. +- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. If `replace_query=1`, the query is substituted. -- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. +- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, where `on_duplicate_clause` is `UPDATE c2 = c2 + 1`. See the [MySQL documentation](https://dev.mysql.com/doc/refman/8.0/en/insert-on-duplicate.html) to find which `on_duplicate_clause` you can use with the `ON DUPLICATE KEY` clause. @@ -121,8 +121,8 @@ Allows to automatically close the connection after query execution, i.e. disable Possible values: -- 1 — Auto-close connection is allowed, so the connection reuse is disabled -- 0 — Auto-close connection is not allowed, so the connection reuse is enabled +- 1 — Auto-close connection is allowed, so the connection reuse is disabled +- 0 — Auto-close connection is not allowed, so the connection reuse is enabled Default value: `1`. @@ -132,8 +132,8 @@ Sets the number of retries for pool with failover. Possible values: -- Positive integer. -- 0 — There are no retries for pool with failover. +- Positive integer. +- 0 — There are no retries for pool with failover. Default value: `3`. @@ -143,7 +143,7 @@ Size of connection pool (if all connections are in use, the query will wait unti Possible values: -- Positive integer. +- Positive integer. Default value: `16`. @@ -153,7 +153,7 @@ Timeout (in seconds) for waiting for free connection (in case of there is alread Possible values: -- Positive integer. +- Positive integer. Default value: `5`. @@ -163,7 +163,7 @@ Connect timeout (in seconds). Possible values: -- Positive integer. +- Positive integer. Default value: `10`. @@ -173,11 +173,11 @@ Read/write timeout (in seconds). Possible values: -- Positive integer. +- Positive integer. Default value: `300`. ## See Also {#see-also} -- [The mysql table function](../../../sql-reference/table-functions/mysql.md) -- [Using MySQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql) +- [The mysql table function](../../../sql-reference/table-functions/mysql.md) +- [Using MySQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql) diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md index f2856c89238..7f09c516d6f 100644 --- a/docs/en/engines/table-engines/integrations/nats.md +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -45,9 +45,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Required parameters: -- `nats_url` – host:port (for example, `localhost:5672`).. -- `nats_subjects` – List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>` -- `nats_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. +- `nats_url` – host:port (for example, `localhost:5672`).. +- `nats_subjects` – List of subject for NATS table to subscribe/publsh to. Supports wildcard subjects like `foo.*.bar` or `baz.>` +- `nats_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. Optional parameters: diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index 8aac9dc3af0..37e08dc1420 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -28,15 +28,15 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the source table structure: -- Column names should be the same as in the source table, but you can use just some of these columns and in any order. -- Column types may differ from those in the source table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. -- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. +- Column names should be the same as in the source table, but you can use just some of these columns and in any order. +- Column types may differ from those in the source table. ClickHouse tries to [cast](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) values to the ClickHouse data types. +- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. **Engine Parameters** -- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. -- `external_database` — Name of a database in an external DBMS. -- `external_table` — Name of a table in the `external_database`. +- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. +- `external_database` — Name of a database in an external DBMS. +- `external_table` — Name of a table in the `external_database`. ## Usage Example {#usage-example} @@ -126,5 +126,5 @@ SELECT * FROM odbc_t ## See Also {#see-also} -- [ODBC dictionaries](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-odbc) -- [ODBC table function](../../../sql-reference/table-functions/odbc.md) +- [ODBC dictionaries](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-odbc) +- [ODBC table function](../../../sql-reference/table-functions/odbc.md) diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index 2222d1fc016..fbd6d944363 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -23,19 +23,19 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen The table structure can differ from the original PostgreSQL table structure: -- Column names should be the same as in the original PostgreSQL table, but you can use just some of these columns and in any order. -- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../engines/database-engines/postgresql.md#data_types-support) values to the ClickHouse data types. -- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. +- Column names should be the same as in the original PostgreSQL table, but you can use just some of these columns and in any order. +- Column types may differ from those in the original PostgreSQL table. ClickHouse tries to [cast](../../../engines/database-engines/postgresql.md#data_types-support) values to the ClickHouse data types. +- The [external_table_functions_use_nulls](../../../operations/settings/settings.md#external-table-functions-use-nulls) setting defines how to handle Nullable columns. Default value: 1. If 0, the table function does not make Nullable columns and inserts default values instead of nulls. This is also applicable for NULL values inside arrays. **Engine Parameters** -- `host:port` — PostgreSQL server address. -- `database` — Remote database name. -- `table` — Remote table name. -- `user` — PostgreSQL user. -- `password` — User password. -- `schema` — Non-default table schema. Optional. -- `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient. +- `host:port` — PostgreSQL server address. +- `database` — Remote database name. +- `table` — Remote table name. +- `user` — PostgreSQL user. +- `password` — User password. +- `schema` — Non-default table schema. Optional. +- `on conflict ...` — example: `ON CONFLICT DO NOTHING`. Optional. Note: adding this option will make insertion less efficient. or via config (since version 21.11): @@ -111,7 +111,7 @@ In the example below replica `example01-1` has the highest priority: ## Usage Example {#usage-example} -Table in PostgreSQL: +### Table in PostgreSQL ``` text postgres=# CREATE TABLE "public"."test" ( @@ -134,7 +134,9 @@ postgresql> SELECT * FROM test; (1 row) ``` -Table in ClickHouse, retrieving data from the PostgreSQL table created above: +### Creating Table in ClickHouse, and connecting to PostgreSQL table created above + +This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table: ``` sql CREATE TABLE default.postgresql_table @@ -146,6 +148,35 @@ CREATE TABLE default.postgresql_table ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password'); ``` +### Inserting initial data from PostgreSQL table into ClickHouse table, using a SELECT query + +The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse: + +``` sql +INSERT INTO default.postgresql_table +SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password'); +``` + +### Inserting incremental data from PostgreSQL table into ClickHouse table + +If then performing ongoing synchronization between the PostgreSQL table and ClickHouse table after the initial insert, you can use a WHERE clause in ClickHouse to insert only data added to PostgreSQL based on a timestamp or unique sequence ID. + +This would require keeping track of the max ID or timestamp previously added, such as the following: + +``` sql +SELECT max(`int_id`) AS maxIntID FROM default.postgresql_table; +``` + +Then inserting values from PostgreSQL table greater than the max + +``` sql +INSERT INTO default.postgresql_table +SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password'); +WHERE int_id > maxIntID; +``` + +### Selecting data from the resulting ClickHouse table + ``` sql SELECT * FROM postgresql_table WHERE str IN ('test'); ``` @@ -156,7 +187,7 @@ SELECT * FROM postgresql_table WHERE str IN ('test'); └────────────────┴──────┴────────┘ ``` -Using Non-default Schema: +### Using Non-default Schema ```text postgres=# CREATE SCHEMA "nice.schema"; @@ -173,8 +204,10 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) **See Also** -- [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md) -- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql) +- [The `postgresql` table function](../../../sql-reference/table-functions/postgresql.md) +- [Using PostgreSQL as a dictionary source](../../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql) ## Related content + - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) +- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2) diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index eec8691a165..08062278904 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -10,8 +10,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c `RabbitMQ` lets you: -- Publish or subscribe to data flows. -- Process streams as they become available. +- Publish or subscribe to data flows. +- Process streams as they become available. ## Creating a Table {#table_engine-rabbitmq-creating-a-table} @@ -51,9 +51,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Required parameters: -- `rabbitmq_host_port` – host:port (for example, `localhost:5672`). -- `rabbitmq_exchange_name` – RabbitMQ exchange name. -- `rabbitmq_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. +- `rabbitmq_host_port` – host:port (for example, `localhost:5672`). +- `rabbitmq_exchange_name` – RabbitMQ exchange name. +- `rabbitmq_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section. Optional parameters: @@ -138,24 +138,24 @@ There can be no more than one exchange per table. One exchange can be shared bet Exchange type options: -- `direct` - Routing is based on the exact matching of keys. Example table key list: `key1,key2,key3,key4,key5`, message key can equal any of them. -- `fanout` - Routing to all tables (where exchange name is the same) regardless of the keys. -- `topic` - Routing is based on patterns with dot-separated keys. Examples: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`. -- `headers` - Routing is based on `key=value` matches with a setting `x-match=all` or `x-match=any`. Example table key list: `x-match=all,format=logs,type=report,year=2020`. -- `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. +- `direct` - Routing is based on the exact matching of keys. Example table key list: `key1,key2,key3,key4,key5`, message key can equal any of them. +- `fanout` - Routing to all tables (where exchange name is the same) regardless of the keys. +- `topic` - Routing is based on patterns with dot-separated keys. Examples: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`. +- `headers` - Routing is based on `key=value` matches with a setting `x-match=all` or `x-match=any`. Example table key list: `x-match=all,format=logs,type=report,year=2020`. +- `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. Setting `rabbitmq_queue_base` may be used for the following cases: -- to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same. -- to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables. -- to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.) +- to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same. +- to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables. +- to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.) To improve performance, received messages are grouped into blocks the size of [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size). If the block wasn’t formed within [stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md) milliseconds, the data will be flushed to the table regardless of the completeness of the block. If `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings are specified along with `rabbitmq_exchange_type`, then: -- `rabbitmq-consistent-hash-exchange` plugin must be enabled. -- `message_id` property of the published messages must be specified (unique for each message/batch). +- `rabbitmq-consistent-hash-exchange` plugin must be enabled. +- `message_id` property of the published messages must be specified (unique for each message/batch). For insert query there is message metadata, which is added for each published message: `messageID` and `republished` flag (true, if published more than once) - can be accessed via message headers. diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 6cbb5af82f6..cde09d79cd8 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -19,11 +19,11 @@ CREATE TABLE s3_engine_table (name String, value UInt32) **Engine parameters** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). -- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. -- `format` — The [format](../../../interfaces/formats.md#formats) of the file. -- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). -- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). +- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `format` — The [format](../../../interfaces/formats.md#formats) of the file. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. ### PARTITION BY @@ -51,18 +51,18 @@ SELECT * FROM s3_engine_table LIMIT 2; ``` ## Virtual columns {#virtual-columns} -- `_path` — Path to the file. -- `_file` — Name of the file. +- `_path` — Path to the file. +- `_file` — Name of the file. For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). ## Implementation Details {#implementation-details} -- Reads and writes can be parallel -- Not supported: - - `ALTER` and `SELECT...SAMPLE` operations. - - Indexes. - - [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not supported. +- Reads and writes can be parallel +- Not supported: + - `ALTER` and `SELECT...SAMPLE` operations. + - Indexes. + - [Zero-copy](../../../operations/storing-data.md#zero-copy) replication is possible, but not supported. :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. @@ -72,10 +72,10 @@ For more information about virtual columns see [here](../../../engines/table-eng `path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. @@ -96,12 +96,12 @@ CREATE TABLE big_table (name String, value UInt32) Suppose we have several files in CSV format with the following URIs on S3: -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_3.csv' There are several ways to make a table consisting of all six files: @@ -131,14 +131,14 @@ CREATE TABLE table_with_asterisk (name String, value UInt32) The following settings can be set before query execution or placed into configuration file. -- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`. -- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`. -- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`. -- `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. -- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. -- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `s3_max_single_part_upload_size` — The maximum size of object to upload using singlepart upload to S3. Default value is `64Mb`. +- `s3_min_upload_part_size` — The minimum size of part to upload during multipart upload to [S3 Multipart upload](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html). Default value is `512Mb`. +- `s3_max_redirects` — Max number of S3 redirects hops allowed. Default value is `10`. +- `s3_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. @@ -146,17 +146,17 @@ Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max The following settings can be specified in configuration file for given endpoint (which will be matched by exact prefix of a URL): -- `endpoint` — Specifies prefix of an endpoint. Mandatory. -- `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional. -- `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and [Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud) metadata for given endpoint. Optional, default value is `false`. -- `region` — Specifies S3 region name. Optional. -- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`. -- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. -- `no_sign_request` - Ignore all the credentials so requests are not signed. Useful for accessing public buckets. -- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. -- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. -- `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional. -- `max_put_rps`, `max_put_burst`, `max_get_rps` and `max_get_burst` - Throttling settings (see description above) to use for specific endpoint instead of per query. Optional. +- `endpoint` — Specifies prefix of an endpoint. Mandatory. +- `access_key_id` and `secret_access_key` — Specifies credentials to use with given endpoint. Optional. +- `use_environment_credentials` — If set to `true`, S3 client will try to obtain credentials from environment variables and [Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud) metadata for given endpoint. Optional, default value is `false`. +- `region` — Specifies S3 region name. Optional. +- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Optional, default value is `false`. +- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. +- `no_sign_request` - Ignore all the credentials so requests are not signed. Useful for accessing public buckets. +- `header` — Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. Optional. +- `max_single_read_retries` — The maximum number of attempts during single read. Default value is `4`. Optional. +- `max_put_rps`, `max_put_burst`, `max_get_rps` and `max_get_burst` - Throttling settings (see description above) to use for specific endpoint instead of per query. Optional. **Example:** @@ -191,4 +191,4 @@ CREATE TABLE big_table (name String, value UInt32) ## See also -- [s3 table function](../../../sql-reference/table-functions/s3.md) +- [s3 table function](../../../sql-reference/table-functions/s3.md) diff --git a/docs/en/engines/table-engines/integrations/sqlite.md b/docs/en/engines/table-engines/integrations/sqlite.md index ba11b73339d..20597d37a87 100644 --- a/docs/en/engines/table-engines/integrations/sqlite.md +++ b/docs/en/engines/table-engines/integrations/sqlite.md @@ -20,8 +20,8 @@ The engine allows to import and export data to SQLite and supports queries to SQ **Engine Parameters** -- `db_path` — Path to SQLite file with a database. -- `table` — Name of a table in the SQLite database. +- `db_path` — Path to SQLite file with a database. +- `table` — Name of a table in the SQLite database. ## Usage Example {#usage-example} @@ -56,5 +56,5 @@ SELECT * FROM sqlite_db.table2 ORDER BY col1; **See Also** -- [SQLite](../../../engines/database-engines/sqlite.md) engine -- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function +- [SQLite](../../../engines/database-engines/sqlite.md) engine +- [sqlite](../../../sql-reference/table-functions/sqlite.md) table function diff --git a/docs/en/engines/table-engines/log-family/index.md b/docs/en/engines/table-engines/log-family/index.md index 9e671163bbf..aca24e68378 100644 --- a/docs/en/engines/table-engines/log-family/index.md +++ b/docs/en/engines/table-engines/log-family/index.md @@ -10,9 +10,9 @@ These engines were developed for scenarios when you need to quickly write many s Engines of the family: -- [StripeLog](/docs/en/engines/table-engines/log-family/stripelog.md) -- [Log](/docs/en/engines/table-engines/log-family/log.md) -- [TinyLog](/docs/en/engines/table-engines/log-family/tinylog.md) +- [StripeLog](/docs/en/engines/table-engines/log-family/stripelog.md) +- [Log](/docs/en/engines/table-engines/log-family/log.md) +- [TinyLog](/docs/en/engines/table-engines/log-family/tinylog.md) `Log` family table engines can store data to [HDFS](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-hdfs) or [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3) distributed file systems. @@ -20,21 +20,21 @@ Engines of the family: Engines: -- Store data on a disk. +- Store data on a disk. -- Append data to the end of file when writing. +- Append data to the end of file when writing. -- Support locks for concurrent data access. +- Support locks for concurrent data access. During `INSERT` queries, the table is locked, and other queries for reading and writing data both wait for the table to unlock. If there are no data writing queries, any number of data reading queries can be performed concurrently. -- Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). +- Do not support [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). -- Do not support indexes. +- Do not support indexes. This means that `SELECT` queries for ranges of data are not efficient. -- Do not write data atomically. +- Do not write data atomically. You can get a table with corrupted data if something breaks the write operation, for example, abnormal server shutdown. diff --git a/docs/en/engines/table-engines/log-family/stripelog.md b/docs/en/engines/table-engines/log-family/stripelog.md index 747713fe69a..6aaa6caf654 100644 --- a/docs/en/engines/table-engines/log-family/stripelog.md +++ b/docs/en/engines/table-engines/log-family/stripelog.md @@ -29,8 +29,8 @@ The `StripeLog` engine stores all the columns in one file. For each `INSERT` que For each table ClickHouse writes the files: -- `data.bin` — Data file. -- `index.mrk` — File with marks. Marks contain offsets for each column of each data block inserted. +- `data.bin` — Data file. +- `index.mrk` — File with marks. Marks contain offsets for each column of each data block inserted. The `StripeLog` engine does not support the `ALTER UPDATE` and `ALTER DELETE` operations. diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 6591f666244..2b8b43802ea 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -122,3 +122,7 @@ FROM test.mv_visits GROUP BY StartDate ORDER BY StartDate; ``` + +## Related Content + +- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) diff --git a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md index 7e16f4926db..0043e1b6748 100644 --- a/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -60,7 +60,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters excepting `sign` have the same meaning as in `MergeTree`. -- `sign` — Name of the column with the type of row: `1` — “state” row, `-1` — “cancel” row. +- `sign` — Name of the column with the type of row: `1` — “state” row, `-1` — “cancel” row. Column Data Type — `Int8`. diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index f5b8436fdfe..edb320a2507 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -77,11 +77,11 @@ The `name` column contains the names of the partition data parts. You can use th Let’s break down the name of the part: `201901_1_9_2_11`: -- `201901` is the partition name. -- `1` is the minimum number of the data block. -- `9` is the maximum number of the data block. -- `2` is the chunk level (the depth of the merge tree it is formed from). -- `11` is the mutation version (if a part mutated) +- `201901` is the partition name. +- `1` is the minimum number of the data block. +- `9` is the maximum number of the data block. +- `2` is the chunk level (the depth of the merge tree it is formed from). +- `11` is the mutation version (if a part mutated) :::info The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level). @@ -165,9 +165,9 @@ Performance of such a query heavily depends on the table layout. Because of that The key factors for a good performance: -- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine -- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing -- partitions should be comparable in size, so all threads will do roughly the same amount of work +- number of partitions involved in the query should be sufficiently large (more than `max_threads / 2`), otherwise query will underutilize the machine +- partitions shouldn't be too small, so batch processing won't degenerate into row-by-row processing +- partitions should be comparable in size, so all threads will do roughly the same amount of work :::info It's recommended to apply some hash function to columns in `partition by` clause in order to distribute data evenly between partitions. @@ -175,6 +175,6 @@ It's recommended to apply some hash function to columns in `partition by` clause Relevant settings are: -- `allow_aggregate_partitions_independently` - controls if the use of optimisation is enabled -- `force_aggregate_partitions_independently` - forces its use when it's applicable from the correctness standpoint, but getting disabled by internal logic that estimates its expediency -- `max_number_of_partitions_for_independent_aggregation` - hard limit on the maximal number of partitions table could have +- `allow_aggregate_partitions_independently` - controls if the use of optimisation is enabled +- `force_aggregate_partitions_independently` - forces its use when it's applicable from the correctness standpoint, but getting disabled by internal logic that estimates its expediency +- `max_number_of_partitions_for_independent_aggregation` - hard limit on the maximal number of partitions table could have diff --git a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md index 9577c8dc936..03e8be13474 100644 --- a/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/graphitemergetree.md @@ -33,19 +33,19 @@ See a detailed description of the [CREATE TABLE](../../../sql-reference/statemen A table for the Graphite data should have the following columns for the following data: -- Metric name (Graphite sensor). Data type: `String`. +- Metric name (Graphite sensor). Data type: `String`. -- Time of measuring the metric. Data type: `DateTime`. +- Time of measuring the metric. Data type: `DateTime`. -- Value of the metric. Data type: `Float64`. +- Value of the metric. Data type: `Float64`. -- Version of the metric. Data type: any numeric (ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts). +- Version of the metric. Data type: any numeric (ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts). The names of these columns should be set in the rollup configuration. **GraphiteMergeTree parameters** -- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. +- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. **Query clauses** @@ -73,7 +73,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters excepting `config_section` have the same meaning as in `MergeTree`. -- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. +- `config_section` — Name of the section in the configuration file, where are the rules of rollup set. @@ -141,18 +141,18 @@ When processing a row, ClickHouse checks the rules in the `pattern` sections. Ea Fields for `pattern` and `default` sections: -- `rule_type` - a rule's type. It's applied only to a particular metrics. The engine use it to separate plain and tagged metrics. Optional parameter. Default value: `all`. +- `rule_type` - a rule's type. It's applied only to a particular metrics. The engine use it to separate plain and tagged metrics. Optional parameter. Default value: `all`. It's unnecessary when performance is not critical, or only one metrics type is used, e.g. plain metrics. By default only one type of rules set is created. Otherwise, if any of special types is defined, two different sets are created. One for plain metrics (root.branch.leaf) and one for tagged metrics (root.branch.leaf;tag1=value1). The default rules are ended up in both sets. Valid values: - - `all` (default) - a universal rule, used when `rule_type` is omitted. - - `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression. - - `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression. - - `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`. -- `regexp` – A pattern for the metric name (a regular or DSL). -- `age` – The minimum age of the data in seconds. -- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day). -- `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. Accepted functions: min / max / any / avg. The average is calculated imprecisely, like the average of the averages. + - `all` (default) - a universal rule, used when `rule_type` is omitted. + - `plain` - a rule for plain metrics. The field `regexp` is processed as regular expression. + - `tagged` - a rule for tagged metrics (metrics are stored in DB in the format of `someName?tag1=value1&tag2=value2&tag3=value3`). Regular expression must be sorted by tags' names, first tag must be `__name__` if exists. The field `regexp` is processed as regular expression. + - `tag_list` - a rule for tagged matrics, a simple DSL for easier metric description in graphite format `someName;tag1=value1;tag2=value2`, `someName`, or `tag1=value1;tag2=value2`. The field `regexp` is translated into a `tagged` rule. The sorting by tags' names is unnecessary, ti will be done automatically. A tag's value (but not a name) can be set as a regular expression, e.g. `env=(dev|staging)`. +- `regexp` – A pattern for the metric name (a regular or DSL). +- `age` – The minimum age of the data in seconds. +- `precision`– How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day). +- `function` – The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. Accepted functions: min / max / any / avg. The average is calculated imprecisely, like the average of the averages. ### Configuration Example without rules types {#configuration-example} diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md index 701615495de..31f5a87a2b6 100644 --- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md @@ -191,3 +191,7 @@ is performance. In practice, users often search for multiple terms at once. For '%big%'` can be evaluated directly using an inverted index by forming the union of the row id lists for terms "little" and "big". This also means that the parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in the future). ::: + +## Related Content + +- Blog: [Introducing Inverted Indices in ClickHouse](https://clickhouse.com/blog/clickhouse-search-with-inverted-indices) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 95efe42c757..d5189d4b9d9 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -12,19 +12,19 @@ Engines in the `MergeTree` family are designed for inserting a very large amount Main features: -- Stores data sorted by primary key. +- Stores data sorted by primary key. This allows you to create a small sparse index that helps find data faster. -- Partitions can be used if the [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified. +- Partitions can be used if the [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified. ClickHouse supports certain operations with partitions that are more efficient than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. -- Data replication support. +- Data replication support. The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](/docs/en/engines/table-engines/mergetree-family/replication.md). -- Data sampling support. +- Data sampling support. If necessary, you can set the data sampling method in the table. @@ -207,10 +207,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] **MergeTree() Parameters** -- `date-column` — The name of a column of the [Date](/docs/en/sql-reference/data-types/date.md) type. ClickHouse automatically creates partitions by month based on this column. The partition names are in the `"YYYYMM"` format. -- `sampling_expression` — An expression for sampling. -- `(primary, key)` — Primary key. Type: [Tuple()](/docs/en/sql-reference/data-types/tuple.md) -- `index_granularity` — The granularity of an index. The number of data rows between the “marks” of an index. The value 8192 is appropriate for most tasks. +- `date-column` — The name of a column of the [Date](/docs/en/sql-reference/data-types/date.md) type. ClickHouse automatically creates partitions by month based on this column. The partition names are in the `"YYYYMM"` format. +- `sampling_expression` — An expression for sampling. +- `(primary, key)` — Primary key. Type: [Tuple()](/docs/en/sql-reference/data-types/tuple.md) +- `index_granularity` — The granularity of an index. The number of data rows between the “marks” of an index. The value 8192 is appropriate for most tasks. **Example** @@ -250,9 +250,9 @@ Take the `(CounterID, Date)` primary key as an example. In this case, the sortin If the data query specifies: -- `CounterID in ('a', 'h')`, the server reads the data in the ranges of marks `[0, 3)` and `[6, 8)`. -- `CounterID IN ('a', 'h') AND Date = 3`, the server reads the data in the ranges of marks `[1, 3)` and `[7, 8)`. -- `Date = 3`, the server reads the data in the range of marks `[1, 10]`. +- `CounterID in ('a', 'h')`, the server reads the data in the ranges of marks `[0, 3)` and `[6, 8)`. +- `CounterID IN ('a', 'h') AND Date = 3`, the server reads the data in the ranges of marks `[1, 3)` and `[7, 8)`. +- `Date = 3`, the server reads the data in the range of marks `[1, 10]`. The examples above show that it is always more effective to use an index than a full scan. @@ -268,18 +268,18 @@ You can use `Nullable`-typed expressions in the `PRIMARY KEY` and `ORDER BY` cla The number of columns in the primary key is not explicitly limited. Depending on the data structure, you can include more or fewer columns in the primary key. This may: -- Improve the performance of an index. +- Improve the performance of an index. If the primary key is `(a, b)`, then adding another column `c` will improve the performance if the following conditions are met: - - There are queries with a condition on column `c`. - - Long data ranges (several times longer than the `index_granularity`) with identical values for `(a, b)` are common. In other words, when adding another column allows you to skip quite long data ranges. + - There are queries with a condition on column `c`. + - Long data ranges (several times longer than the `index_granularity`) with identical values for `(a, b)` are common. In other words, when adding another column allows you to skip quite long data ranges. -- Improve data compression. +- Improve data compression. ClickHouse sorts data by primary key, so the higher the consistency, the better the compression. -- Provide additional logic when merging data parts in the [CollapsingMergeTree](/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md/#table_engine-collapsingmergetree) and [SummingMergeTree](/docs/en/engines/table-engines/mergetree-family/summingmergetree.md) engines. +- Provide additional logic when merging data parts in the [CollapsingMergeTree](/docs/en/engines/table-engines/mergetree-family/collapsingmergetree.md/#table_engine-collapsingmergetree) and [SummingMergeTree](/docs/en/engines/table-engines/mergetree-family/summingmergetree.md) engines. In this case it makes sense to specify the *sorting key* that is different from the primary key. @@ -483,25 +483,25 @@ Indexes of type `set` can be utilized by all functions. The other index types ar Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization. -(*) For `hasTokenCaseInsensitve` and `hasTokenCaseInsensitive` to be effective, the `tokenbf_v1` index must be created on lowercased data, for example `INDEX idx (lower(str_col)) TYPE tokenbf_v1(512, 3, 0)`. +(*) For `hasTokenCaseInsensitive` and `hasTokenCaseInsensitiveOrNull` to be effective, the `tokenbf_v1` index must be created on lowercased data, for example `INDEX idx (lower(str_col)) TYPE tokenbf_v1(512, 3, 0)`. :::note Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can not be used for optimizing queries where the result of a function is expected to be false. For example: -- Can be optimized: - - `s LIKE '%test%'` - - `NOT s NOT LIKE '%test%'` - - `s = 1` - - `NOT s != 1` - - `startsWith(s, 'test')` -- Can not be optimized: - - `NOT s LIKE '%test%'` - - `s NOT LIKE '%test%'` - - `NOT s = 1` - - `s != 1` - - `NOT startsWith(s, 'test')` +- Can be optimized: + - `s LIKE '%test%'` + - `NOT s NOT LIKE '%test%'` + - `s = 1` + - `NOT s != 1` + - `startsWith(s, 'test')` +- Can not be optimized: + - `NOT s LIKE '%test%'` + - `s NOT LIKE '%test%'` + - `NOT s = 1` + - `s != 1` + - `NOT startsWith(s, 'test')` ::: @@ -614,11 +614,11 @@ TTL expr Type of TTL rule may follow each TTL expression. It affects an action which is to be done once the expression is satisfied (reaches current time): -- `DELETE` - delete expired rows (default action); -- `RECOMPRESS codec_name` - recompress data part with the `codec_name`; -- `TO DISK 'aaa'` - move part to the disk `aaa`; -- `TO VOLUME 'bbb'` - move part to the disk `bbb`; -- `GROUP BY` - aggregate expired rows. +- `DELETE` - delete expired rows (default action); +- `RECOMPRESS codec_name` - recompress data part with the `codec_name`; +- `TO DISK 'aaa'` - move part to the disk `aaa`; +- `TO VOLUME 'bbb'` - move part to the disk `bbb`; +- `GROUP BY` - aggregate expired rows. `DELETE` action can be used together with `WHERE` clause to delete only some of the expired rows based on a filtering condition: ``` sql @@ -722,10 +722,10 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be ### Terms {#terms} -- Disk — Block device mounted to the filesystem. -- Default disk — Disk that stores the path specified in the [path](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-path) server setting. -- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)). -- Storage policy — Set of volumes and the rules for moving data between them. +- Disk — Block device mounted to the filesystem. +- Default disk — Disk that stores the path specified in the [path](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-path) server setting. +- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)). +- Storage policy — Set of volumes and the rules for moving data between them. The names given to the described entities can be found in the system tables, [system.storage_policies](/docs/en/operations/system-tables/storage_policies.md/#system_tables-storage_policies) and [system.disks](/docs/en/operations/system-tables/disks.md/#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables. @@ -759,9 +759,9 @@ Configuration structure: Tags: -- `` — Disk name. Names must be different for all disks. -- `path` — path under which a server will store data (`data` and `shadow` folders), should be terminated with ‘/’. -- `keep_free_space_bytes` — the amount of free disk space to be reserved. +- `` — Disk name. Names must be different for all disks. +- `path` — path under which a server will store data (`data` and `shadow` folders), should be terminated with ‘/’. +- `keep_free_space_bytes` — the amount of free disk space to be reserved. The order of the disk definition is not important. @@ -797,14 +797,14 @@ Storage policies configuration markup: Tags: -- `policy_name_N` — Policy name. Policy names must be unique. -- `volume_name_N` — Volume name. Volume names must be unique. -- `disk` — a disk within a volume. -- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. -- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. -- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. -- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). -- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`. +- `policy_name_N` — Policy name. Policy names must be unique. +- `volume_name_N` — Volume name. Volume names must be unique. +- `disk` — a disk within a volume. +- `max_data_part_size_bytes` — the maximum size of a part that can be stored on any of the volume’s disks. If the a size of a merged part estimated to be bigger than `max_data_part_size_bytes` then this part will be written to a next volume. Basically this feature allows to keep new/small parts on a hot (SSD) volume and move them to a cold (HDD) volume when they reach large size. Do not use this setting if your policy has only one volume. +- `move_factor` — when the amount of available space gets lower than this factor, data automatically starts to move on the next volume if any (by default, 0.1). ClickHouse sorts existing parts by size from largest to smallest (in descending order) and selects parts with the total size that is sufficient to meet the `move_factor` condition. If the total size of all parts is insufficient, all parts will be moved. +- `prefer_not_to_merge` — Disables merging of data parts on this volume. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `perform_ttl_move_on_insert` — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). +- `load_balancing` - Policy for disk balancing, `round_robin` or `least_used`. Cofiguration examples: @@ -880,10 +880,10 @@ The number of threads performing background moves of data parts can be changed b In the case of `MergeTree` tables, data is getting to disk in different ways: -- As a result of an insert (`INSERT` query). -- During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). -- When downloading from another replica. -- As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). +- As a result of an insert (`INSERT` query). +- During background merges and [mutations](/docs/en/sql-reference/statements/alter/index.md#alter-mutations). +- When downloading from another replica. +- As a result of partition freezing [ALTER TABLE … FREEZE PARTITION](/docs/en/sql-reference/statements/alter/partition.md/#alter_freeze-partition). In all these cases except for mutations and partition freezing, a part is stored on a volume and a disk according to the given storage policy: @@ -956,30 +956,30 @@ ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [ Required parameters: -- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. -- `access_key_id` — S3 access key id. -- `secret_access_key` — S3 secret access key. +- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. +- `access_key_id` — S3 access key id. +- `secret_access_key` — S3 secret access key. Optional parameters: -- `region` — S3 region name. -- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. -- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. -- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. -- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. -- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. -- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. -- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. -- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. -- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. -- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. -- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. -- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. -- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. -- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. -- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). -- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `region` — S3 region name. +- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs. +- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`. +- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`. +- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`. +- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL. +- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`. +- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`. +- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`. +- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. +- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. +- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. +- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. +- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. +- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. +- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). +- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. ### Configuring the cache @@ -994,12 +994,12 @@ This is the cache configuration from above: ``` These parameters define the cache layer: -- `type` — If a disk is of type `cache` it caches mark and index files in memory. -- `disk` — The name of the disk that will be cached. +- `type` — If a disk is of type `cache` it caches mark and index files in memory. +- `disk` — The name of the disk that will be cached. Cache parameters: -- `path` — The path where metadata for the cache is stored. -- `max_size` — The size (amount of memory) that the cache can grow to. +- `path` — The path where metadata for the cache is stored. +- `max_size` — The size (amount of memory) that the cache can grow to. :::tip There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details. @@ -1100,9 +1100,9 @@ Examples of working configurations can be found in integration tests directory ( ## Virtual Columns {#virtual-columns} -- `_part` — Name of a part. -- `_part_index` — Sequential index of the part in the query result. -- `_partition_id` — Name of a partition. -- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`). -- `_partition_value` — Values (a tuple) of a `partition by` expression. -- `_sample_factor` — Sample factor (from the query). +- `_part` — Name of a part. +- `_part_index` — Sequential index of the part in the query result. +- `_partition_id` — Name of a partition. +- `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`). +- `_partition_value` — Values (a tuple) of a `partition by` expression. +- `_sample_factor` — Sample factor (from the query). diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 8351a31db55..81d8cc2d3ca 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -20,12 +20,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], ... -) ENGINE = ReplacingMergeTree([ver]) +) ENGINE = ReplacingMergeTree([ver [, is_deleted]]) [PARTITION BY expr] [ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] -[SETTINGS name=value, ...] +[SETTINGS name=value, clean_deleted_rows=value, ...] ``` For a description of request parameters, see [statement description](../../../sql-reference/statements/create/table.md). @@ -88,6 +88,18 @@ SELECT * FROM mySecondReplacingMT FINAL; └─────┴─────────┴─────────────────────┘ ``` +### is_deleted + +`is_deleted` — Name of the column with the type of row: `1` is a “deleted“ row, `0` is a “state“ row. + + Column data type — `Int8`. + + Can only be enabled when `ver` is used. + The row is deleted when use the `OPTIMIZE ... FINAL CLEANUP`, or `OPTIMIZE ... FINAL` if the engine settings `clean_deleted_rows` has been set to `Always`. + No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted one is the one kept. + + + ## Query clauses When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table. @@ -111,6 +123,6 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters excepting `ver` have the same meaning as in `MergeTree`. -- `ver` - column with the version. Optional parameter. For a description, see the text above. +- `ver` - column with the version. Optional parameter. For a description, see the text above. diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index e14ba5699e4..01782ac25bd 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -26,13 +26,13 @@ ENGINE = ReplicatedReplacingMergeTree Replication is only supported for tables in the MergeTree family: -- ReplicatedMergeTree -- ReplicatedSummingMergeTree -- ReplicatedReplacingMergeTree -- ReplicatedAggregatingMergeTree -- ReplicatedCollapsingMergeTree -- ReplicatedVersionedCollapsingMergeTree -- ReplicatedGraphiteMergeTree +- ReplicatedMergeTree +- ReplicatedSummingMergeTree +- ReplicatedReplacingMergeTree +- ReplicatedAggregatingMergeTree +- ReplicatedCollapsingMergeTree +- ReplicatedVersionedCollapsingMergeTree +- ReplicatedGraphiteMergeTree Replication works at the level of an individual table, not the entire server. A server can store both replicated and non-replicated tables at the same time. @@ -42,9 +42,9 @@ Compressed data for `INSERT` and `ALTER` queries is replicated (for more informa `CREATE`, `DROP`, `ATTACH`, `DETACH` and `RENAME` queries are executed on a single server and are not replicated: -- The `CREATE TABLE` query creates a new replicatable table on the server where the query is run. If this table already exists on other servers, it adds a new replica. -- The `DROP TABLE` query deletes the replica located on the server where the query is run. -- The `RENAME` query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas. +- The `CREATE TABLE` query creates a new replicatable table on the server where the query is run. If this table already exists on other servers, it adds a new replica. +- The `DROP TABLE` query deletes the replica located on the server where the query is run. +- The `RENAME` query renames the table on one of the replicas. In other words, replicated tables can have different names on different replicas. ClickHouse uses [ClickHouse Keeper](/docs/en/guides/sre/keeper/index.md) for storing replicas meta information. It is possible to use ZooKeeper version 3.4.5 or newer, but ClickHouse Keeper is recommended. @@ -316,8 +316,8 @@ Create a MergeTree table with a different name. Move all the data from the direc If you want to get rid of a `ReplicatedMergeTree` table without launching the server: -- Delete the corresponding `.sql` file in the metadata directory (`/var/lib/clickhouse/metadata/`). -- Delete the corresponding path in ClickHouse Keeper (`/path_to_table/replica_name`). +- Delete the corresponding `.sql` file in the metadata directory (`/var/lib/clickhouse/metadata/`). +- Delete the corresponding path in ClickHouse Keeper (`/path_to_table/replica_name`). After this, you can launch the server, create a `MergeTree` table, move the data to its directory, and then restart the server. @@ -327,8 +327,8 @@ If the data in ClickHouse Keeper was lost or damaged, you can save data by movin **See Also** -- [background_schedule_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_schedule_pool_size) -- [background_fetches_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_fetches_pool_size) -- [execute_merges_on_single_replica_time_threshold](/docs/en/operations/settings/settings.md/#execute-merges-on-single-replica-time-threshold) -- [max_replicated_fetches_network_bandwidth](/docs/en/operations/settings/merge-tree-settings.md/#max_replicated_fetches_network_bandwidth) -- [max_replicated_sends_network_bandwidth](/docs/en/operations/settings/merge-tree-settings.md/#max_replicated_sends_network_bandwidth) +- [background_schedule_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_schedule_pool_size) +- [background_fetches_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_fetches_pool_size) +- [execute_merges_on_single_replica_time_threshold](/docs/en/operations/settings/settings.md/#execute-merges-on-single-replica-time-threshold) +- [max_replicated_fetches_network_bandwidth](/docs/en/operations/settings/merge-tree-settings.md/#max_replicated_fetches_network_bandwidth) +- [max_replicated_sends_network_bandwidth](/docs/en/operations/settings/merge-tree-settings.md/#max_replicated_sends_network_bandwidth) diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md index d0078656b5d..c3cbb0d2a03 100644 --- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md @@ -59,7 +59,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters excepting `columns` have the same meaning as in `MergeTree`. -- `columns` — tuple with names of columns values of which will be summarized. Optional parameter. For a description, see the text above. +- `columns` — tuple with names of columns values of which will be summarized. Optional parameter. For a description, see the text above. @@ -122,8 +122,8 @@ Table can have nested data structures that are processed in a special way. If the name of a nested table ends with `Map` and it contains at least two columns that meet the following criteria: -- the first column is numeric `(*Int*, Date, DateTime)` or a string `(String, FixedString)`, let’s call it `key`, -- the other columns are arithmetic `(*Int*, Float32/64)`, let’s call it `(values...)`, +- the first column is numeric `(*Int*, Date, DateTime)` or a string `(String, FixedString)`, let’s call it `key`, +- the other columns are arithmetic `(*Int*, Float32/64)`, let’s call it `(values...)`, then this nested table is interpreted as a mapping of `key => (values...)`, and when merging its rows, the elements of two data sets are merged by `key` with a summation of the corresponding `(values...)`. @@ -186,3 +186,7 @@ ARRAY JOIN When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`. For nested data structure, you do not need to specify its columns in the tuple of columns for summation. + +## Related Content + +- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) diff --git a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 74ac9c97fc0..22a15c0e15e 100644 --- a/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -8,8 +8,8 @@ sidebar_label: VersionedCollapsingMergeTree This engine: -- Allows quick writing of object states that are continually changing. -- Deletes old object states in the background. This significantly reduces the volume of storage. +- Allows quick writing of object states that are continually changing. +- Deletes old object states in the background. This significantly reduces the volume of storage. See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details. @@ -73,11 +73,11 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] All of the parameters except `sign` and `version` have the same meaning as in `MergeTree`. -- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. +- `sign` — Name of the column with the type of row: `1` is a “state” row, `-1` is a “cancel” row. Column Data Type — `Int8`. -- `version` — Name of the column with the version of the object state. +- `version` — Name of the column with the version of the object state. The column data type should be `UInt*`. diff --git a/docs/en/engines/table-engines/special/dictionary.md b/docs/en/engines/table-engines/special/dictionary.md index 05d07d94e56..3517ebfcdc6 100644 --- a/docs/en/engines/table-engines/special/dictionary.md +++ b/docs/en/engines/table-engines/special/dictionary.md @@ -97,4 +97,4 @@ select * from products limit 1; **See Also** -- [Dictionary function](../../../sql-reference/table-functions/dictionary.md#dictionary-function) +- [Dictionary function](../../../sql-reference/table-functions/dictionary.md#dictionary-function) diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index d13453bc08f..bb97e56c1cc 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -229,8 +229,8 @@ A simple remainder from the division is a limited solution for sharding and isn You should be concerned about the sharding scheme in the following cases: -- Queries are used that require joining data (`IN` or `JOIN`) by a specific key. If data is sharded by this key, you can use local `IN` or `JOIN` instead of `GLOBAL IN` or `GLOBAL JOIN`, which is much more efficient. -- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries. +- Queries are used that require joining data (`IN` or `JOIN`) by a specific key. If data is sharded by this key, you can use local `IN` or `JOIN` instead of `GLOBAL IN` or `GLOBAL JOIN`, which is much more efficient. +- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries. Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The periodicity for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. @@ -256,6 +256,6 @@ Since [remote](../../../sql-reference/table-functions/remote.md) and [cluster](. **See Also** -- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) description -- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting -- [shardNum()](../../../sql-reference/functions/other-functions.md#shard-num) and [shardCount()](../../../sql-reference/functions/other-functions.md#shard-count) functions +- [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) description +- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting +- [shardNum()](../../../sql-reference/functions/other-functions.md#shard-num) and [shardCount()](../../../sql-reference/functions/other-functions.md#shard-count) functions diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index e99b1c83cbc..9c4e87487b4 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -10,9 +10,9 @@ The File table engine keeps the data in a file in one of the supported [file for Usage scenarios: -- Data export from ClickHouse to file. -- Convert data from one format to another. -- Updating data in ClickHouse via editing a file on a disk. +- Data export from ClickHouse to file. +- Convert data from one format to another. +- Updating data in ClickHouse via editing a file on a disk. ## Usage in ClickHouse Server {#usage-in-clickhouse-server} @@ -78,14 +78,14 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64 ## Details of Implementation {#details-of-implementation} -- Multiple `SELECT` queries can be performed concurrently, but `INSERT` queries will wait each other. -- Supported creating new file by `INSERT` query. -- If file exists, `INSERT` would append new values in it. -- Not supported: - - `ALTER` - - `SELECT ... SAMPLE` - - Indices - - Replication +- Multiple `SELECT` queries can be performed concurrently, but `INSERT` queries will wait each other. +- Supported creating new file by `INSERT` query. +- If file exists, `INSERT` would append new values in it. +- Not supported: + - `ALTER` + - `SELECT ... SAMPLE` + - Indices + - Replication ## PARTITION BY diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index 714afe3c3b5..9fcdb47e555 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -9,8 +9,8 @@ The GenerateRandom table engine produces random data for given table schema. Usage examples: -- Use in test to populate reproducible large table. -- Generate random input for fuzzing tests. +- Use in test to populate reproducible large table. +- Generate random input for fuzzing tests. ## Usage in ClickHouse Server {#usage-in-clickhouse-server} @@ -49,9 +49,9 @@ SELECT * FROM generate_engine_table LIMIT 3 ## Details of Implementation {#details-of-implementation} -- Not supported: - - `ALTER` - - `SELECT ... SAMPLE` - - `INSERT` - - Indices - - Replication +- Not supported: + - `ALTER` + - `SELECT ... SAMPLE` + - `INSERT` + - Indices + - Replication diff --git a/docs/en/engines/table-engines/special/index.md b/docs/en/engines/table-engines/special/index.md index 2247aeae5af..a1c5056de22 100644 --- a/docs/en/engines/table-engines/special/index.md +++ b/docs/en/engines/table-engines/special/index.md @@ -8,8 +8,8 @@ sidebar_label: Special There are three main categories of table engines: -- [MergeTree engine family](../../../engines/table-engines/mergetree-family/index.md) for main production use. -- [Log engine family](../../../engines/table-engines/log-family/index.md) for small temporary data. -- [Table engines for integrations](../../../engines/table-engines/integrations/index.md). +- [MergeTree engine family](../../../engines/table-engines/mergetree-family/index.md) for main production use. +- [Log engine family](../../../engines/table-engines/log-family/index.md) for small temporary data. +- [Table engines for integrations](../../../engines/table-engines/integrations/index.md). The remaining engines are unique in their purpose and are not grouped into families yet, thus they are placed in this “special” category. diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index a7cc525dd6c..e9f0678beee 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -54,8 +54,8 @@ You can use `INSERT` queries to add data to the `Join`-engine tables. If the tab Main use-cases for `Join`-engine tables are following: -- Place the table to the right side in a `JOIN` clause. -- Call the [joinGet](/docs/en/sql-reference/functions/other-functions.md/#joinget) function, which lets you extract data from the table the same way as from a dictionary. +- Place the table to the right side in a `JOIN` clause. +- Call the [joinGet](/docs/en/sql-reference/functions/other-functions.md/#joinget) function, which lets you extract data from the table the same way as from a dictionary. ### Deleting Data {#deleting-data} diff --git a/docs/en/engines/table-engines/special/keepermap.md b/docs/en/engines/table-engines/special/keepermap.md index e5c4dea2339..a1c7009b712 100644 --- a/docs/en/engines/table-engines/special/keepermap.md +++ b/docs/en/engines/table-engines/special/keepermap.md @@ -112,3 +112,7 @@ If setting `keeper_map_strict_mode` is set to `true`, fetching and updating data ```sql ALTER TABLE keeper_map_table UPDATE v1 = v1 * 10 + 2 WHERE key LIKE 'some%' AND v3 > 3.1; ``` + +## Related content + +- Blog: [Building a Real-time Analytics Apps with ClickHouse and Hex](https://clickhouse.com/blog/building-real-time-applications-with-clickhouse-and-hex-notebook-keeper-engine) diff --git a/docs/en/engines/table-engines/special/merge.md b/docs/en/engines/table-engines/special/merge.md index ccdb5b5fad7..bd6149406a9 100644 --- a/docs/en/engines/table-engines/special/merge.md +++ b/docs/en/engines/table-engines/special/merge.md @@ -78,11 +78,11 @@ SELECT * FROM WatchLog; ## Virtual Columns {#virtual-columns} -- `_table` — Contains the name of the table from which data was read. Type: [String](../../../sql-reference/data-types/string.md). +- `_table` — Contains the name of the table from which data was read. Type: [String](../../../sql-reference/data-types/string.md). You can set the constant conditions on `_table` in the `WHERE/PREWHERE` clause (for example, `WHERE _table='xyz'`). In this case the read operation is performed only for that tables where the condition on `_table` is satisfied, so the `_table` column acts as an index. **See Also** -- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) -- [merge](../../../sql-reference/table-functions/merge.md) table function +- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) +- [merge](../../../sql-reference/table-functions/merge.md) table function diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index af8a80c75b0..c2a8d9ce8bf 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -91,11 +91,11 @@ SELECT * FROM url_engine_table ## Details of Implementation {#details-of-implementation} -- Reads and writes can be parallel -- Not supported: - - `ALTER` and `SELECT...SAMPLE` operations. - - Indexes. - - Replication. +- Reads and writes can be parallel +- Not supported: + - `ALTER` and `SELECT...SAMPLE` operations. + - Indexes. + - Replication. ## PARTITION BY diff --git a/docs/en/getting-started/example-datasets/amazon-reviews.md b/docs/en/getting-started/example-datasets/amazon-reviews.md index 55014d9b5f4..f35806aa66f 100644 --- a/docs/en/getting-started/example-datasets/amazon-reviews.md +++ b/docs/en/getting-started/example-datasets/amazon-reviews.md @@ -43,7 +43,7 @@ The rows look like: ```response ┌─marketplace─┬─customer_id─┬─review_id──────┬─product_id─┬─product_parent─┬─product_title──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─product_category─┬─star_rating─┬─helpful_votes─┬─total_votes─┬─vine──┬─verified_purchase─┬─review_headline───────────┬─review_body────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─review_date─┐ -│ US │ 16414143 │ R3W4P9UBGNGH1U │ B00YL0EKWE │ 852431543 │ LG G4 Case Hard Transparent Slim Clear Cover for LG G4 │ Wireless │ 2 │ 1 │ 3 │ false │ true │ Looks good, functions meh │ 2 issues - Once I turned on the circle apps and installed this case, my battery drained twice as fast as usual. I ended up turning off the circle apps, which kind of makes the case just a case... with a hole in it. Second, the wireless charging doesn't work. I have a Motorola 360 watch and a Qi charging pad. The watch charges fine but this case doesn't. But hey, it looks nice. │ 2015-08-31 │ +│ US │ 16414143 │ R3W4P9UBGNGH1U │ B00YL0EKWE │ 852431543 │ LG G4 Case Hard Transparent Slim Clear Cover for LG G4 │ Wireless │ 2 │ 1 │ 3 │ false │ true │ Looks good, functions meh │ 2 issues - Once I turned on the circle apps and installed this case, my battery drained twice as fast as usual. I ended up turning off the circle apps, which kind of makes the case just a case... with a hole in it. Second, the wireless charging doesn't work. I have a Motorola 360 watch and a Qi charging pad. The watch charges fine but this case doesn't. But hey, it looks nice. │ 2015-08-31 │ │ US │ 50800750 │ R15V54KBMTQWAY │ B00XK95RPQ │ 516894650 │ Selfie Stick Fiblastiq™ Extendable Wireless Bluetooth Selfie Stick with built-in Bluetooth Adjustable Phone Holder │ Wireless │ 4 │ 0 │ 0 │ false │ false │ A fun little gadget │ I’m embarrassed to admit that until recently, I have had a very negative opinion about “selfie sticks” aka “monopods” aka “narcissticks.” But having reviewed a number of them recently, they’re growing on me. This one is pretty nice and simple to set up and with easy instructions illustrated on the back of the box (not sure why some reviewers have stated that there are no instructions when they are clearly printed on the box unless they received different packaging than I did). Once assembled, the pairing via bluetooth and use of the stick are easy and intuitive. Nothing to it.

The stick comes with a USB charging cable but arrived with a charge so you can use it immediately, though it’s probably a good idea to charge it right away so that you have no interruption of use out of the box. Make sure the stick is switched to on (it will light up) and extend your stick to the length you desire up to about a yard’s length and snap away.

The phone clamp held the phone sturdily so I wasn’t worried about it slipping out. But the longer you extend the stick, the harder it is to maneuver. But that will happen with any stick and is not specific to this one in particular.

Two things that could improve this: 1) add the option to clamp this in portrait orientation instead of having to try and hold the stick at the portrait angle, which makes it feel unstable; 2) add the opening for a tripod so that this can be used to sit upright on a table for skyping and facetime eliminating the need to hold the phone up with your hand, causing fatigue.

But other than that, this is a nice quality monopod for a variety of picture taking opportunities.

I received a sample in exchange for my honest opinion. │ 2015-08-31 │ │ US │ 15184378 │ RY8I449HNXSVF │ B00SXRXUKO │ 984297154 │ Tribe AB40 Water Resistant Sports Armband with Key Holder for 4.7-Inch iPhone 6S/6/5/5S/5C, Galaxy S4 + Screen Protector - Dark Pink │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Five Stars │ Fits iPhone 6 well │ 2015-08-31 │ │ US │ 10203548 │ R18TLJYCKJFLSR │ B009V5X1CE │ 279912704 │ RAVPower® Element 10400mAh External Battery USB Portable Charger (Dual USB Outputs, Ultra Compact Design), Travel Charger for iPhone 6,iPhone 6 plus,iPhone 5, 5S, 5C, 4S, 4, iPad Air, 4, 3, 2, Mini 2 (Apple adapters not included); Samsung Galaxy S5, S4, S3, S2, Note 3, Note 2; HTC One, EVO, Thunderbolt, Incredible, Droid DNA, Motorola ATRIX, Droid, Moto X, Google Glass, Nexus 4, Nexus 5, Nexus 7, │ Wireless │ 5 │ 0 │ 0 │ false │ true │ Great charger │ Great charger. I easily get 3+ charges on a Samsung Galaxy 3. Works perfectly for camping trips or long days on the boat. │ 2015-08-31 │ diff --git a/docs/en/getting-started/example-datasets/github.md b/docs/en/getting-started/example-datasets/github.md index e18c7dec1a6..02965ed5e33 100644 --- a/docs/en/getting-started/example-datasets/github.md +++ b/docs/en/getting-started/example-datasets/github.md @@ -2499,7 +2499,9 @@ LIMIT 20 We welcome exact and improved solutions here. -# Related Content +## Related Content -- [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits) -- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) +- Blog: [Git commits and our community](https://clickhouse.com/blog/clickhouse-git-community-commits) +- Blog: [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) +- Blog: [Building a Real-time Analytics Apps with ClickHouse and Hex](https://clickhouse.com/blog/building-real-time-applications-with-clickhouse-and-hex-notebook-keeper-engine) +- Blog: [A Story of Open-source GitHub Activity using ClickHouse + Grafana](https://clickhouse.com/blog/introduction-to-clickhouse-and-grafana-webinar) diff --git a/docs/en/getting-started/example-datasets/nypd_complaint_data.md b/docs/en/getting-started/example-datasets/nypd_complaint_data.md index 154cfa78e53..a178fe456a6 100644 --- a/docs/en/getting-started/example-datasets/nypd_complaint_data.md +++ b/docs/en/getting-started/example-datasets/nypd_complaint_data.md @@ -380,7 +380,7 @@ decide that we would look at the types of crimes reported over time in the five New York City. These fields might be then included in the `ORDER BY`: | Column | Description (from the data dictionary) | -| ----------- | --------------------------------------------------- | +| ----------- | --------------------------------------------------- | | OFNS_DESC | Description of offense corresponding with key code | | RPT_DT | Date event was reported to police | | BORO_NM | The name of the borough in which the incident occurred | diff --git a/docs/en/getting-started/example-datasets/ontime.md b/docs/en/getting-started/example-datasets/ontime.md index 7a6e78206b9..9efa1afb5c4 100644 --- a/docs/en/getting-started/example-datasets/ontime.md +++ b/docs/en/getting-started/example-datasets/ontime.md @@ -390,9 +390,9 @@ You can also play with the data in Playground, [example](https://play.clickhouse This performance test was created by Vadim Tkachenko. See: -- https://www.percona.com/blog/2009/10/02/analyzing-air-traffic-performance-with-infobright-and-monetdb/ -- https://www.percona.com/blog/2009/10/26/air-traffic-queries-in-luciddb/ -- https://www.percona.com/blog/2009/11/02/air-traffic-queries-in-infinidb-early-alpha/ -- https://www.percona.com/blog/2014/04/21/using-apache-hadoop-and-impala-together-with-mysql-for-data-analysis/ -- https://www.percona.com/blog/2016/01/07/apache-spark-with-air-ontime-performance-data/ -- http://nickmakos.blogspot.ru/2012/08/analyzing-air-traffic-performance-with.html +- https://www.percona.com/blog/2009/10/02/analyzing-air-traffic-performance-with-infobright-and-monetdb/ +- https://www.percona.com/blog/2009/10/26/air-traffic-queries-in-luciddb/ +- https://www.percona.com/blog/2009/11/02/air-traffic-queries-in-infinidb-early-alpha/ +- https://www.percona.com/blog/2014/04/21/using-apache-hadoop-and-impala-together-with-mysql-for-data-analysis/ +- https://www.percona.com/blog/2016/01/07/apache-spark-with-air-ontime-performance-data/ +- http://nickmakos.blogspot.ru/2012/08/analyzing-air-traffic-performance-with.html diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md index 729d3d17015..e0a66022d37 100644 --- a/docs/en/getting-started/example-datasets/recipes.md +++ b/docs/en/getting-started/example-datasets/recipes.md @@ -50,13 +50,13 @@ clickhouse-client --query " This is a showcase how to parse custom CSV, as it requires multiple tunes. Explanation: -- The dataset is in CSV format, but it requires some preprocessing on insertion; we use table function [input](../../sql-reference/table-functions/input.md) to perform preprocessing; -- The structure of CSV file is specified in the argument of the table function `input`; -- The field `num` (row number) is unneeded - we parse it from file and ignore; -- We use `FORMAT CSVWithNames` but the header in CSV will be ignored (by command line parameter `--input_format_with_names_use_header 0`), because the header does not contain the name for the first field; -- File is using only double quotes to enclose CSV strings; some strings are not enclosed in double quotes, and single quote must not be parsed as the string enclosing - that's why we also add the `--format_csv_allow_single_quote 0` parameter; -- Some strings from CSV cannot parse, because they contain `\M/` sequence at the beginning of the value; the only value starting with backslash in CSV can be `\N` that is parsed as SQL NULL. We add `--input_format_allow_errors_num 10` parameter and up to ten malformed records can be skipped; -- There are arrays for ingredients, directions and NER fields; these arrays are represented in unusual form: they are serialized into string as JSON and then placed in CSV - we parse them as String and then use [JSONExtract](../../sql-reference/functions/json-functions.md) function to transform it to Array. +- The dataset is in CSV format, but it requires some preprocessing on insertion; we use table function [input](../../sql-reference/table-functions/input.md) to perform preprocessing; +- The structure of CSV file is specified in the argument of the table function `input`; +- The field `num` (row number) is unneeded - we parse it from file and ignore; +- We use `FORMAT CSVWithNames` but the header in CSV will be ignored (by command line parameter `--input_format_with_names_use_header 0`), because the header does not contain the name for the first field; +- File is using only double quotes to enclose CSV strings; some strings are not enclosed in double quotes, and single quote must not be parsed as the string enclosing - that's why we also add the `--format_csv_allow_single_quote 0` parameter; +- Some strings from CSV cannot parse, because they contain `\M/` sequence at the beginning of the value; the only value starting with backslash in CSV can be `\N` that is parsed as SQL NULL. We add `--input_format_allow_errors_num 10` parameter and up to ten malformed records can be skipped; +- There are arrays for ingredients, directions and NER fields; these arrays are represented in unusual form: they are serialized into string as JSON and then placed in CSV - we parse them as String and then use [JSONExtract](../../sql-reference/functions/json-functions.md) function to transform it to Array. ## Validate the Inserted Data diff --git a/docs/en/getting-started/example-datasets/youtube-dislikes.md b/docs/en/getting-started/example-datasets/youtube-dislikes.md index ea12042c635..5f4ef696b8b 100644 --- a/docs/en/getting-started/example-datasets/youtube-dislikes.md +++ b/docs/en/getting-started/example-datasets/youtube-dislikes.md @@ -122,17 +122,10 @@ SELECT super_titles, ifNull(uploader_badges, '') AS uploader_badges, ifNull(video_badges, '') AS video_badges -FROM s3Cluster( - 'default', +FROM s3( 'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst', 'JSONLines' ) -SETTINGS - max_download_threads = 24, - max_insert_threads = 64, - max_insert_block_size = 100000000, - min_insert_block_size_rows = 100000000, - min_insert_block_size_bytes = 500000000; ``` Some comments about our `INSERT` command: @@ -140,7 +133,6 @@ Some comments about our `INSERT` command: - The `parseDateTimeBestEffortUSOrZero` function is handy when the incoming date fields may not be in the proper format. If `fetch_date` does not get parsed properly, it will be set to `0` - The `upload_date` column contains valid dates, but it also contains strings like "4 hours ago" - which is certainly not a valid date. We decided to store the original value in `upload_date_str` and attempt to parse it with `toDate(parseDateTimeBestEffortUSOrZero(upload_date::String))`. If the parsing fails we just get `0` - We used `ifNull` to avoid getting `NULL` values in our table. If an incoming value is `NULL`, the `ifNull` function is setting the value to an empty string -- It takes a long time to download the data, so we added a `SETTINGS` clause to spread out the work over more threads while making sure the block sizes stayed fairly large 4. Open a new tab in the SQL Console of ClickHouse Cloud (or a new `clickhouse-client` window) and watch the count increase. It will take a while to insert 4.56B rows, depending on your server resources. (Without any tweaking of settings, it takes about 4.5 hours.) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 2c0ac70a321..d5c476f08b4 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -161,11 +161,11 @@ sudo systemctl status clickhouse-keeper #### Packages {#packages} -- `clickhouse-common-static` — Installs ClickHouse compiled binary files. -- `clickhouse-server` — Creates a symbolic link for `clickhouse-server` and installs the default server configuration. -- `clickhouse-client` — Creates a symbolic link for `clickhouse-client` and other client-related tools. and installs client configuration files. -- `clickhouse-common-static-dbg` — Installs ClickHouse compiled binary files with debug info. -- `clickhouse-keeper` - Used to install ClickHouse Keeper on dedicated ClickHouse Keeper nodes. If you are running ClickHouse Keeper on the same server as ClickHouse server, then you do not need to install this package. Installs ClickHouse Keeper and the default ClickHouse Keeper configuration files. +- `clickhouse-common-static` — Installs ClickHouse compiled binary files. +- `clickhouse-server` — Creates a symbolic link for `clickhouse-server` and installs the default server configuration. +- `clickhouse-client` — Creates a symbolic link for `clickhouse-client` and other client-related tools. and installs client configuration files. +- `clickhouse-common-static-dbg` — Installs ClickHouse compiled binary files with debug info. +- `clickhouse-keeper` - Used to install ClickHouse Keeper on dedicated ClickHouse Keeper nodes. If you are running ClickHouse Keeper on the same server as ClickHouse server, then you do not need to install this package. Installs ClickHouse Keeper and the default ClickHouse Keeper configuration files. :::info If you need to install specific version of ClickHouse you have to install all packages with the same version: @@ -429,8 +429,8 @@ We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The C The required volume of RAM generally depends on: -- The complexity of queries. -- The amount of data that is processed in queries. +- The complexity of queries. +- The amount of data that is processed in queries. To calculate the required volume of RAM, you may estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use. @@ -442,11 +442,11 @@ The ClickHouse binary requires at least 2.5 GB of disk space for installation. The volume of storage required for your data may be calculated separately based on -- an estimation of the data volume. +- an estimation of the data volume. You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. -- The data compression coefficient. +- The data compression coefficient. To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. diff --git a/docs/en/getting-started/playground.md b/docs/en/getting-started/playground.md index dbb8d46a2fc..6a6d4092177 100644 --- a/docs/en/getting-started/playground.md +++ b/docs/en/getting-started/playground.md @@ -26,8 +26,8 @@ You can make queries to Playground using any HTTP client, for example [curl](htt The queries are executed as a read-only user. It implies some limitations: -- DDL queries are not allowed -- INSERT queries are not allowed +- DDL queries are not allowed +- INSERT queries are not allowed The service also have quotas on its usage. diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index e3b40d83efe..9bf4a465962 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -119,7 +119,7 @@ When processing a query, the client shows: 1. Progress, which is updated no more than 10 times per second (by default). For quick queries, the progress might not have time to be displayed. 2. The formatted query after parsing, for debugging. 3. The result in the specified format. -4. The number of lines in the result, the time passed, and the average speed of query processing. +4. The number of lines in the result, the time passed, and the average speed of query processing. All data amounts refer to uncompressed data. You can cancel a long query by pressing Ctrl+C. However, you will still need to wait for a little for the server to abort the request. It is not possible to cancel a query at certain stages. If you do not wait and press Ctrl+C a second time, the client will exit. @@ -148,8 +148,8 @@ Format a query as usual, then place the values that you want to pass from the ap {:} ``` -- `name` — Placeholder identifier. In the console client it should be used in app parameters as `--param_ = value`. -- `data type` — [Data type](../sql-reference/data-types/index.md) of the app parameter value. For example, a data structure like `(integer, ('string', integer))` can have the `Tuple(UInt8, Tuple(String, UInt8))` data type (you can also use another [integer](../sql-reference/data-types/int-uint.md) types). It's also possible to pass table, database, column names as a parameter, in that case you would need to use `Identifier` as a data type. +- `name` — Placeholder identifier. In the console client it should be used in app parameters as `--param_ = value`. +- `data type` — [Data type](../sql-reference/data-types/index.md) of the app parameter value. For example, a data structure like `(integer, ('string', integer))` can have the `Tuple(UInt8, Tuple(String, UInt8))` data type (you can also use another [integer](../sql-reference/data-types/int-uint.md) types). It's also possible to pass table, database, column names as a parameter, in that case you would need to use `Identifier` as a data type. #### Example {#example} @@ -162,37 +162,37 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe You can pass parameters to `clickhouse-client` (all parameters have a default value) using: -- From the Command Line +- From the Command Line Command-line options override the default values and settings in configuration files. -- Configuration files. +- Configuration files. Settings in the configuration files override the default values. ### Command Line Options {#command-line-options} -- `--host, -h` – The server name, ‘localhost’ by default. You can use either the name or the IPv4 or IPv6 address. -- `--port` – The port to connect to. Default value: 9000. Note that the HTTP interface and the native interface use different ports. -- `--user, -u` – The username. Default value: default. -- `--password` – The password. Default value: empty string. -- `--ask-password` - Prompt the user to enter a password. -- `--query, -q` – The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option. -- `--queries-file` – file path with queries to execute. You must specify either `query` or `queries-file` option. -- `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). -- `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter). -- `--multiquery, -n` – If specified, allow processing multiple queries separated by semicolons. -- `--format, -f` – Use the specified default format to output the result. -- `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. -- `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode. -- `--stacktrace` – If specified, also print the stack trace if an exception occurs. -- `--config-file` – The name of the configuration file. -- `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). -- `--history_file` — Path to a file containing command history. -- `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). -- `--hardware-utilization` — Print hardware utilization information in progress bar. -- `--print-profile-events` – Print `ProfileEvents` packets. -- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). +- `--host, -h` – The server name, ‘localhost’ by default. You can use either the name or the IPv4 or IPv6 address. +- `--port` – The port to connect to. Default value: 9000. Note that the HTTP interface and the native interface use different ports. +- `--user, -u` – The username. Default value: default. +- `--password` – The password. Default value: empty string. +- `--ask-password` - Prompt the user to enter a password. +- `--query, -q` – The query to process when using non-interactive mode. You must specify either `query` or `queries-file` option. +- `--queries-file` – file path with queries to execute. You must specify either `query` or `queries-file` option. +- `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). +- `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter). +- `--multiquery, -n` – If specified, allow processing multiple queries separated by semicolons. +- `--format, -f` – Use the specified default format to output the result. +- `--vertical, -E` – If specified, use the [Vertical format](../interfaces/formats.md#vertical) by default to output the result. This is the same as `–format=Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. +- `--time, -t` – If specified, print the query execution time to ‘stderr’ in non-interactive mode. +- `--stacktrace` – If specified, also print the stack trace if an exception occurs. +- `--config-file` – The name of the configuration file. +- `--secure` – If specified, will connect to server over secure connection (TLS). You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). +- `--history_file` — Path to a file containing command history. +- `--param_` — Value for a [query with parameters](#cli-queries-with-parameters). +- `--hardware-utilization` — Print hardware utilization information in progress bar. +- `--print-profile-events` – Print `ProfileEvents` packets. +- `--profile-events-delay-ms` – Delay between printing `ProfileEvents` packets (-1 - print only totals, 0 - print every single packet). Since version 20.5, `clickhouse-client` has automatic syntax highlighting (always enabled). @@ -200,10 +200,10 @@ Since version 20.5, `clickhouse-client` has automatic syntax highlighting (alway `clickhouse-client` uses the first existing file of the following: -- Defined in the `--config-file` parameter. -- `./clickhouse-client.xml` -- `~/.clickhouse-client/config.xml` -- `/etc/clickhouse-client/config.xml` +- Defined in the `--config-file` parameter. +- `./clickhouse-client.xml` +- `~/.clickhouse-client/config.xml` +- `/etc/clickhouse-client/config.xml` Example of a config file: diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 03f0201e3e0..2ab9e8caec4 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -38,6 +38,7 @@ The supported formats are: | [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | | [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | | [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ | | [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | | [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | | [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | @@ -68,6 +69,7 @@ The supported formats are: | [Avro](#data-format-avro) | ✔ | ✔ | | [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | | [Parquet](#data-format-parquet) | ✔ | ✔ | +| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ | | [Arrow](#data-format-arrow) | ✔ | ✔ | | [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | | [ORC](#data-format-orc) | ✔ | ✔ | @@ -78,7 +80,7 @@ The supported formats are: | [Null](#null) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ | | [CapnProto](#capnproto) | ✔ | ✔ | -| [LineAsString](#lineasstring) | ✔ | ✗ | +| [LineAsString](#lineasstring) | ✔ | ✔ | | [Regexp](#data-format-regexp) | ✔ | ✗ | | [RawBLOB](#rawblob) | ✔ | ✔ | | [MsgPack](#msgpack) | ✔ | ✔ | @@ -255,11 +257,11 @@ where `delimiter_i` is a delimiter between values (`$` symbol can be escaped as `column_i` is a name or index of a column whose values are to be selected or inserted (if empty, then column will be skipped), `serializeAs_i` is an escaping rule for the column values. The following escaping rules are supported: -- `CSV`, `JSON`, `XML` (similar to the formats of the same names) -- `Escaped` (similar to `TSV`) -- `Quoted` (similar to `Values`) -- `Raw` (without escaping, similar to `TSVRaw`) -- `None` (no escaping rule, see further) +- `CSV`, `JSON`, `XML` (similar to the formats of the same names) +- `Escaped` (similar to `TSV`) +- `Quoted` (similar to `Values`) +- `Raw` (without escaping, similar to `TSVRaw`) +- `None` (no escaping rule, see further) If an escaping rule is omitted, then `None` will be used. `XML` is suitable only for output. @@ -275,15 +277,15 @@ The `format_template_rows_between_delimiter` setting specifies the delimiter bet Setting `format_template_resultset` specifies the path to the file, which contains a format string for resultset. Format string for resultset has the same syntax as a format string for row and allows to specify a prefix, a suffix and a way to print some additional information. It contains the following placeholders instead of column names: -- `data` is the rows with data in `format_template_row` format, separated by `format_template_rows_between_delimiter`. This placeholder must be the first placeholder in the format string. -- `totals` is the row with total values in `format_template_row` format (when using WITH TOTALS) -- `min` is the row with minimum values in `format_template_row` format (when extremes are set to 1) -- `max` is the row with maximum values in `format_template_row` format (when extremes are set to 1) -- `rows` is the total number of output rows -- `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows_before_limit_at_least is the exact number of rows there would have been without a LIMIT. -- `time` is the request execution time in seconds -- `rows_read` is the number of rows has been read -- `bytes_read` is the number of bytes (uncompressed) has been read +- `data` is the rows with data in `format_template_row` format, separated by `format_template_rows_between_delimiter`. This placeholder must be the first placeholder in the format string. +- `totals` is the row with total values in `format_template_row` format (when using WITH TOTALS) +- `min` is the row with minimum values in `format_template_row` format (when extremes are set to 1) +- `max` is the row with maximum values in `format_template_row` format (when extremes are set to 1) +- `rows` is the total number of output rows +- `rows_before_limit` is the minimal number of rows there would have been without LIMIT. Output only if the query contains LIMIT. If the query contains GROUP BY, rows_before_limit_at_least is the exact number of rows there would have been without a LIMIT. +- `time` is the request execution time in seconds +- `rows_read` is the number of rows has been read +- `bytes_read` is the number of bytes (uncompressed) has been read The placeholders `data`, `totals`, `min` and `max` must not have escaping rule specified (or `None` must be specified explicitly). The remaining placeholders may have any escaping rule specified. If the `format_template_resultset` setting is an empty string, `${data}` is used as the default value. @@ -619,8 +621,8 @@ ClickHouse supports [NULL](/docs/en/sql-reference/syntax.md), which is displayed **See Also** -- [JSONEachRow](#jsoneachrow) format -- [output_format_json_array_of_rows](/docs/en/operations/settings/settings-formats.md/#output_format_json_array_of_rows) setting +- [JSONEachRow](#jsoneachrow) format +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings-formats.md/#output_format_json_array_of_rows) setting For JSON input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings-formats.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. @@ -917,6 +919,40 @@ Example: While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. +## PrettyJSONEachRow {#prettyjsoneachrow} + +Differs from JSONEachRow only in that JSON is pretty formatted with new line delimiters and 4 space indents. Suitable only for output. + +Example + +```json +{ + "num": "42", + "str": "hello", + "arr": [ + "0", + "1" + ], + "tuple": { + "num": 42, + "str": "world" + } +} +{ + "num": "43", + "str": "hello", + "arr": [ + "0", + "1", + "2" + ], + "tuple": { + "num": 43, + "str": "world" + } +} +``` + ## JSONStringsEachRow {#jsonstringseachrow} Differs from JSONEachRow only in that data fields are output in strings, not in typed JSON values. @@ -1096,8 +1132,8 @@ INSERT INTO UserActivity FORMAT JSONEachRow {"PageViews":5, "UserID":"4324182021 ClickHouse allows: -- Any order of key-value pairs in the object. -- Omitting some values. +- Any order of key-value pairs in the object. +- Omitting some values. ClickHouse ignores spaces between elements and commas after the objects. You can pass all the objects in one line. You do not have to separate them with line breaks. @@ -1117,8 +1153,8 @@ CREATE TABLE IF NOT EXISTS example_table ) ENGINE = Memory; ``` -- If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type). -- If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`. +- If `input_format_defaults_for_omitted_fields = 0`, then the default value for `x` and `a` equals `0` (as the default value for the `UInt32` data type). +- If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`. :::note When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`. @@ -1447,8 +1483,8 @@ For [NULL](/docs/en/sql-reference/syntax.md/#null-literal) support, an additiona Similar to [RowBinary](#rowbinary), but with added header: -- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) -- N `String`s specifying column names +- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) +- N `String`s specifying column names :::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, @@ -1460,9 +1496,9 @@ Otherwise, the first row will be skipped. Similar to [RowBinary](#rowbinary), but with added header: -- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) -- N `String`s specifying column names -- N `String`s specifying column types +- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) +- N `String`s specifying column names +- N `String`s specifying column types :::note If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, @@ -1872,11 +1908,18 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Avro" > file.avro Column names must: -- start with `[A-Za-z_]` -- subsequently contain only `[A-Za-z0-9_]` +- start with `[A-Za-z_]` +- subsequently contain only `[A-Za-z0-9_]` Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings-formats.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings-formats.md/#output_format_avro_sync_interval) respectively. +### Example Data {#example-data-avro} + +Using the ClickHouse [DESCRIBE](/docs/en/sql-reference/statements/describe-table) function, you can quickly view the inferred format of an Avro file like the following example. This example includes the URL of a publicly accessible Avro file in the ClickHouse S3 public bucket: + +``` DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro'); +``` + ## AvroConfluent {#data-format-avro-confluent} AvroConfluent supports decoding single-object Avro messages commonly used with [Kafka](https://kafka.apache.org/) and [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html). @@ -1936,30 +1979,31 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml` The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) | -|----------------------------------------------------|-----------------------------------------------------------------|------------------------------| -| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | -| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` | -| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` | -| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` | -| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | -| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` | -| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | -| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` | -| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | -| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | -| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | -| `FIXED_LENGTH_BYTE_ARRAY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` | +| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) | +|-----------------------------------------------|------------------------------------------------------------------------------------------------------------|-------------------------------| +| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | +| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` | +| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` | +| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` | +| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` | +| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | +| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` | +| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | +| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | +| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` | +| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_LENGTH_BYTE_ARRAY` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. @@ -1995,6 +2039,138 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. - [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`. +## ParquetMetadata {data-format-parquet-metadata} + +Special format for reading Parquet file metadata (https://parquet.apache.org/docs/file-format/metadata/). It always outputs one row with the next structure/content: +- num_columns - the number of columns +- num_rows - the total number of rows +- num_row_groups - the total number of row groups +- format_version - parquet format version, always 1.0 or 2.6 +- total_uncompressed_size - total uncompressed bytes size of the data, calculated as the sum of total_byte_size from all row groups +- total_compressed_size - total compressed bytes size of the data, calculated as the sum of total_compressed_size from all row groups +- columns - the list of columns metadata with the next structure: + - name - column name + - path - column path (differs from name for nested column) + - max_definition_level - maximum definition level + - max_repetition_level - maximum repetition level + - physical_type - column physical type + - logical_type - column logical type + - compression - compression used for this column + - total_uncompressed_size - total uncompressed bytes size of the column, calculated as the sum of total_uncompressed_size of the column from all row groups + - total_compressed_size - total compressed bytes size of the column, calculated as the sum of total_compressed_size of the column from all row groups + - space_saved - percent of space saved by compression, calculated as (1 - total_compressed_size/total_uncompressed_size). + - encodings - the list of encodings used for this column +- row_groups - the list of row groups metadata with the next structure: + - num_columns - the number of columns in the row group + - num_rows - the number of rows in the row group + - total_uncompressed_size - total uncompressed bytes size of the row group + - total_compressed_size - total compressed bytes size of the row group + - columns - the list of column chunks metadata with the next structure: + - name - column name + - path - column path + - total_compressed_size - total compressed bytes size of the column + - total_uncompressed_size - total uncompressed bytes size of the row group + - have_statistics - boolean flag that indicates if column chunk metadata contains column statistics + - statistics - column chunk statistics (all fields are NULL if have_statistics = false) with the next structure: + - num_values - the number of non-null values in the column chunk + - null_count - the number of NULL values in the column chunk + - distinct_count - the number of distinct values in the column chunk + - min - the minimum value of the column chunk + - max - the maximum column of the column chunk + +Example: + +```sql +SELECT * FROM file(data.parquet, ParquetMetadata) format PrettyJSONEachRow +``` + +```json +{ + "num_columns": "2", + "num_rows": "100000", + "num_row_groups": "2", + "format_version": "2.6", + "metadata_size": "577", + "total_uncompressed_size": "282436", + "total_compressed_size": "26633", + "columns": [ + { + "name": "number", + "path": "number", + "max_definition_level": "0", + "max_repetition_level": "0", + "physical_type": "INT32", + "logical_type": "Int(bitWidth=16, isSigned=false)", + "compression": "LZ4", + "total_uncompressed_size": "133321", + "total_compressed_size": "13293", + "space_saved": "90.03%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + }, + { + "name": "concat('Hello', toString(modulo(number, 1000)))", + "path": "concat('Hello', toString(modulo(number, 1000)))", + "max_definition_level": "0", + "max_repetition_level": "0", + "physical_type": "BYTE_ARRAY", + "logical_type": "None", + "compression": "LZ4", + "total_uncompressed_size": "149115", + "total_compressed_size": "13340", + "space_saved": "91.05%", + "encodings": [ + "RLE_DICTIONARY", + "PLAIN", + "RLE" + ] + } + ], + "row_groups": [ + { + "num_columns": "2", + "num_rows": "65409", + "total_uncompressed_size": "179809", + "total_compressed_size": "14163", + "columns": [ + { + "name": "number", + "path": "number", + "total_compressed_size": "7070", + "total_uncompressed_size": "85956", + "have_statistics": true, + "statistics": { + "num_values": "65409", + "null_count": "0", + "distinct_count": null, + "min": "0", + "max": "999" + } + }, + { + "name": "concat('Hello', toString(modulo(number, 1000)))", + "path": "concat('Hello', toString(modulo(number, 1000)))", + "total_compressed_size": "7093", + "total_uncompressed_size": "93853", + "have_statistics": true, + "statistics": { + "num_values": "65409", + "null_count": "0", + "distinct_count": null, + "min": "Hello0", + "max": "Hello999" + } + } + ] + }, + ... + ] +} +``` + ## Arrow {#data-format-arrow} [Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats. @@ -2005,31 +2181,32 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) | -|-----------------------------------------|-----------------------------------------------------------------|----------------------------| -| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | -| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | -| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` | -| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | -| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` | -| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | -| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | -| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | -| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | -| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | -| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | -| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` | -| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | -| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` | -| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | -| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` | -| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | -| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` | -| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | -| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | -| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | -| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` | +| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) | +|-----------------------------------------|------------------------------------------------------------------------------------------------------------|----------------------------| +| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` | +| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` | +| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` | +| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` | +| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` | +| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` | +| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` | +| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` | +| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` | +| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` | +| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` | +| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` | +| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` | +| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` | +| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` | +| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` | +| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` | +| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` | +| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | +| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | +| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` | +| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` | +| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` | +| `FIXED_SIZE_BINARY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_SIZE_BINARY` | Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. @@ -2064,7 +2241,7 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. - [output_format_arrow_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_fixed_string_as_fixed_byte_array) - use Arrow FIXED_SIZE_BINARY type instead of Binary/String for FixedString columns. Default value - `true`. -- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `none`. +- [output_format_arrow_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_compression_method) - compression method used in output Arrow format. Default value - `lz4_frame`. ## ArrowStream {#data-format-arrow-stream} @@ -2078,23 +2255,26 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries. -| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) | -|---------------------------------------|---------------------------------------------------------------|--------------------------| -| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` | -| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` | -| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` | -| `Int` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | -| `Bigint` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` | -| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` | -| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` | -| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` | -| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` | -| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` | -| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` | -| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` | -| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` | -| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` | -| `-` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | +| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) | +|---------------------------------------|-------------------------------------------------------------------------------------------------------------------|--------------------------| +| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` | +| `Tinyint` | [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `Tinyint` | +| `Smallint` | [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `Smallint` | +| `Int` | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | +| `Bigint` | [Int64/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` | +| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` | +| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` | +| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` | +| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` | +| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` | +| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` | +| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` | +| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` | +| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` | +| `Int` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` | +| `Binary` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `Binary` | +| `Binary` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `Binary` | +| `Binary` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `Binary` | Other types are not supported. @@ -2159,17 +2339,17 @@ Each line of imported data is parsed according to the regular expression. When working with the `Regexp` format, you can use the following settings: -- `format_regexp` — [String](/docs/en/sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format. +- `format_regexp` — [String](/docs/en/sql-reference/data-types/string.md). Contains regular expression in the [re2](https://github.com/google/re2/wiki/Syntax) format. -- `format_regexp_escaping_rule` — [String](/docs/en/sql-reference/data-types/string.md). The following escaping rules are supported: +- `format_regexp_escaping_rule` — [String](/docs/en/sql-reference/data-types/string.md). The following escaping rules are supported: - - CSV (similarly to [CSV](#csv)) - - JSON (similarly to [JSONEachRow](#jsoneachrow)) - - Escaped (similarly to [TSV](#tabseparated)) - - Quoted (similarly to [Values](#data-format-values)) - - Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw)) + - CSV (similarly to [CSV](#csv)) + - JSON (similarly to [JSONEachRow](#jsoneachrow)) + - Escaped (similarly to [TSV](#tabseparated)) + - Quoted (similarly to [Values](#data-format-values)) + - Raw (extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](#tabseparatedraw)) -- `format_regexp_skip_unmatched` — [UInt8](/docs/en/sql-reference/data-types/int-uint.md). Defines the need to throw an exception in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. +- `format_regexp_skip_unmatched` — [UInt8](/docs/en/sql-reference/data-types/int-uint.md). Defines the need to throw an exception in case the `format_regexp` expression does not match the imported data. Can be set to `0` or `1`. **Usage** diff --git a/docs/en/interfaces/grpc.md b/docs/en/interfaces/grpc.md index 807663be646..3087ad20eac 100644 --- a/docs/en/interfaces/grpc.md +++ b/docs/en/interfaces/grpc.md @@ -10,14 +10,14 @@ sidebar_label: gRPC Interface ClickHouse supports [gRPC](https://grpc.io/) interface. It is an open source remote procedure call system that uses HTTP/2 and [Protocol Buffers](https://en.wikipedia.org/wiki/Protocol_Buffers). The implementation of gRPC in ClickHouse supports: -- SSL; -- authentication; -- sessions; -- compression; -- parallel queries through the same channel; -- cancellation of queries; -- getting progress and logs; -- external tables. +- SSL; +- authentication; +- sessions; +- compression; +- parallel queries through the same channel; +- cancellation of queries; +- getting progress and logs; +- external tables. The specification of the interface is described in [clickhouse_grpc.proto](https://github.com/ClickHouse/ClickHouse/blob/master/src/Server/grpc_protos/clickhouse_grpc.proto). @@ -64,15 +64,15 @@ Or you can use a built-in Python client. It is placed in [utils/grpc-client/clic The client supports the following arguments: -- `--help` – Shows a help message and exits. -- `--host HOST, -h HOST` – A server name. Default value: `localhost`. You can use IPv4 or IPv6 addresses also. -- `--port PORT` – A port to connect to. This port should be enabled in the ClickHouse server configuration (see `grpc_port`). Default value: `9100`. -- `--user USER_NAME, -u USER_NAME` – A user name. Default value: `default`. -- `--password PASSWORD` – A password. Default value: empty string. -- `--query QUERY, -q QUERY` – A query to process when using non-interactive mode. -- `--database DATABASE, -d DATABASE` – A default database. If not specified, the current database set in the server settings is used (`default` by default). -- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – A result output [format](formats.md). Default value for interactive mode: `PrettyCompact`. -- `--debug` – Enables showing debug information. +- `--help` – Shows a help message and exits. +- `--host HOST, -h HOST` – A server name. Default value: `localhost`. You can use IPv4 or IPv6 addresses also. +- `--port PORT` – A port to connect to. This port should be enabled in the ClickHouse server configuration (see `grpc_port`). Default value: `9100`. +- `--user USER_NAME, -u USER_NAME` – A user name. Default value: `default`. +- `--password PASSWORD` – A password. Default value: empty string. +- `--query QUERY, -q QUERY` – A query to process when using non-interactive mode. +- `--database DATABASE, -d DATABASE` – A default database. If not specified, the current database set in the server settings is used (`default` by default). +- `--format OUTPUT_FORMAT, -f OUTPUT_FORMAT` – A result output [format](formats.md). Default value for interactive mode: `PrettyCompact`. +- `--debug` – Enables showing debug information. To run the client in an interactive mode call it without `--query` argument. diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 4bc108cac7c..3a7f6d4d854 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -293,11 +293,11 @@ X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_ro Possible header fields: -- `read_rows` — Number of rows read. -- `read_bytes` — Volume of data read in bytes. -- `total_rows_to_read` — Total number of rows to be read. -- `written_rows` — Number of rows written. -- `written_bytes` — Volume of data written in bytes. +- `read_rows` — Number of rows read. +- `read_bytes` — Volume of data read in bytes. +- `total_rows_to_read` — Total number of rows to be read. +- `written_rows` — Number of rows written. +- `written_bytes` — Volume of data written in bytes. Running requests do not stop automatically if the HTTP connection is lost. Parsing and data formatting are performed on the server-side, and using the network might be ineffective. The optional ‘query_id’ parameter can be passed as the query ID (any string). For more information, see the section “Settings, replace_running_query”. @@ -333,6 +333,35 @@ You can create a query with parameters and pass values for them from the corresp $ curl -sS "
?param_id=2¶m_phrase=test" -d "SELECT * FROM table WHERE int_column = {id:UInt8} and string_column = {phrase:String}" ``` +### Tabs in URL Parameters + +Query parameters are parsed from the "escaped" format. This has some benefits, such as the possibility to unambiguously parse nulls as `\N`. This means the tab character should be encoded as `\t` (or `\` and a tab). For example, the following contains an actual tab between `abc` and `123` and the input string is split into two values: + +```bash +curl -sS "http://localhost:8123" -d "SELECT splitByChar('\t', 'abc 123')" +``` + +```response +['abc','123'] +``` + +However, if you try to encode an actual tab using `%09` in a URL parameter, it won't get parsed properly: + +```bash +curl -sS "http://localhost:8123?param_arg1=abc%09123" -d "SELECT splitByChar('\t', {arg1:String})" +Code: 457. DB::Exception: Value abc 123 cannot be parsed as String for query parameter 'arg1' because it isn't parsed completely: only 3 of 7 bytes was parsed: abc. (BAD_QUERY_PARAMETER) (version 23.4.1.869 (official build)) +``` + +If you are using URL parameters, you will need to encode the `\t` as `%5C%09`. For example: + +```bash +curl -sS "http://localhost:8123?param_arg1=abc%5C%09123" -d "SELECT splitByChar('\t', {arg1:String})" +``` + +```response +['abc','123'] +``` + ## Predefined HTTP Interface {#predefined_http_interface} ClickHouse supports specific queries through the HTTP interface. For example, you can write data to a table as follows: @@ -345,7 +374,7 @@ ClickHouse also supports Predefined HTTP Interface which can help you more easil Example: -- First of all, add this section to server configuration file: +- First of all, add this section to server configuration file: @@ -364,7 +393,7 @@ Example: ``` -- You can now request the URL directly for data in the Prometheus format: +- You can now request the URL directly for data in the Prometheus format: @@ -419,22 +448,22 @@ As you can see from the example if `http_handlers` is configured in the config.x Now `rule` can configure `method`, `headers`, `url`, `handler`: - `method` is responsible for matching the method part of the HTTP request. `method` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request. -- `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request. +- `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request. -- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request. +- `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2’s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request. -- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`. +- `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`. `type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static). - - `query` — use with `predefined_query_handler` type, executes query when the handler is called. + - `query` — use with `predefined_query_handler` type, executes query when the handler is called. - - `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request parameters. + - `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request parameters. - - `status` — use with `static` type, response status code. + - `status` — use with `static` type, response status code. - - `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). + - `content_type` — use with any type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type). - - `response_content` — use with `static` type, response content sent to client, when using the prefix ‘file://’ or ‘config://’, find the content from the file or configuration sends to client. + - `response_content` — use with `static` type, response content sent to client, when using the prefix ‘file://’ or ‘config://’, find the content from the file or configuration sends to client. Next are the configuration methods for different `type`. diff --git a/docs/en/interfaces/jdbc.md b/docs/en/interfaces/jdbc.md index b2ff2829af9..2b68316cc3d 100644 --- a/docs/en/interfaces/jdbc.md +++ b/docs/en/interfaces/jdbc.md @@ -8,6 +8,6 @@ sidebar_label: JDBC Driver Use the [official JDBC driver](https://github.com/ClickHouse/clickhouse-jdbc) (and Java client) to access ClickHouse from your Java applications. -- Third-party drivers: - - [ClickHouse-Native-JDBC](https://github.com/housepower/ClickHouse-Native-JDBC) - - [clickhouse4j](https://github.com/blynkkk/clickhouse4j) +- Third-party drivers: + - [ClickHouse-Native-JDBC](https://github.com/housepower/ClickHouse-Native-JDBC) + - [clickhouse4j](https://github.com/blynkkk/clickhouse4j) diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index c2d6038125b..fab3ba42758 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -57,9 +57,9 @@ If user password is specified using [SHA256](../operations/settings/settings-use Restrictions: -- prepared queries are not supported +- prepared queries are not supported -- some data types are sent as strings +- some data types are sent as strings To cancel a long query use `KILL QUERY connection_id` statement (it is replaced with `KILL QUERY WHERE query_id = connection_id` while proceeding). For example: diff --git a/docs/en/interfaces/overview.md b/docs/en/interfaces/overview.md index ee47e010f9e..8f16dcf5f83 100644 --- a/docs/en/interfaces/overview.md +++ b/docs/en/interfaces/overview.md @@ -10,19 +10,19 @@ description: ClickHouse provides three network interfaces ClickHouse provides three network interfaces (they can be optionally wrapped in TLS for additional security): -- [HTTP](http.md), which is documented and easy to use directly. -- [Native TCP](../interfaces/tcp.md), which has less overhead. -- [gRPC](grpc.md). +- [HTTP](http.md), which is documented and easy to use directly. +- [Native TCP](../interfaces/tcp.md), which has less overhead. +- [gRPC](grpc.md). In most cases it is recommended to use an appropriate tool or library instead of interacting with those directly. The following are officially supported by ClickHouse: -- [Command-line client](../interfaces/cli.md) -- [JDBC driver](../interfaces/jdbc.md) -- [ODBC driver](../interfaces/odbc.md) -- [C++ client library](../interfaces/cpp.md) +- [Command-line client](../interfaces/cli.md) +- [JDBC driver](../interfaces/jdbc.md) +- [ODBC driver](../interfaces/odbc.md) +- [C++ client library](../interfaces/cpp.md) There are also a wide range of third-party libraries for working with ClickHouse: -- [Client libraries](../interfaces/third-party/client-libraries.md) -- [Integrations](../interfaces/third-party/integrations.md) -- [Visual interfaces](../interfaces/third-party/gui.md) +- [Client libraries](../interfaces/third-party/client-libraries.md) +- [Integrations](../interfaces/third-party/integrations.md) +- [Visual interfaces](../interfaces/third-party/gui.md) diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index 1069a04391f..f7603994163 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -10,68 +10,68 @@ sidebar_label: Client Libraries ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. ::: -- Python - - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) - - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - - [clickhouse-client](https://github.com/yurial/clickhouse-client) - - [aiochclient](https://github.com/maximdanilchenko/aiochclient) - - [asynch](https://github.com/long2ice/asynch) -- PHP - - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) - - [bozerkins/clickhouse-client](https://packagist.org/packages/bozerkins/clickhouse-client) - - [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client) - - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client) - - [SeasClick C++ client](https://github.com/SeasX/SeasClick) - - [one-ck](https://github.com/lizhichao/one-ck) - - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) - - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php) - - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php) -- Go - - [clickhouse](https://github.com/kshvakov/clickhouse/) - - [go-clickhouse](https://github.com/roistat/go-clickhouse) - - [chconn](https://github.com/vahid-sohrabloo/chconn) - - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) - - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) - - [uptrace/go-clickhouse](https://clickhouse.uptrace.dev/) -- Swift - - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO) - - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor) -- NodeJs - - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) - - [node-clickhouse](https://github.com/apla/node-clickhouse) - - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) - - [clickhouse-client](https://github.com/depyronick/clickhouse-client) - - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) -- Perl - - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) - - [AnyEvent-ClickHouse](https://metacpan.org/release/AnyEvent-ClickHouse) -- Ruby - - [ClickHouse (Ruby)](https://github.com/shlima/click_house) - - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord) -- Rust - - [clickhouse.rs](https://github.com/loyd/clickhouse.rs) - - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) - - [Klickhouse](https://github.com/Protryon/klickhouse) -- R - - [RClickHouse](https://github.com/IMSMWU/RClickHouse) -- Java - - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) - - [clickhouse-client](https://github.com/Ecwid/clickhouse-client) -- Scala - - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) -- Kotlin - - [AORM](https://github.com/TanVD/AORM) -- C# - - [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient) - - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) - - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) - - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) -- Elixir - - [clickhousex](https://github.com/appodeal/clickhousex/) - - [pillar](https://github.com/sofakingworld/pillar) -- Nim - - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse) -- Haskell - - [hdbc-clickhouse](https://github.com/zaneli/hdbc-clickhouse) +- Python + - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) + - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) + - [clickhouse-client](https://github.com/yurial/clickhouse-client) + - [aiochclient](https://github.com/maximdanilchenko/aiochclient) + - [asynch](https://github.com/long2ice/asynch) +- PHP + - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) + - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) + - [bozerkins/clickhouse-client](https://packagist.org/packages/bozerkins/clickhouse-client) + - [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client) + - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client) + - [SeasClick C++ client](https://github.com/SeasX/SeasClick) + - [one-ck](https://github.com/lizhichao/one-ck) + - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) + - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php) + - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php) +- Go + - [clickhouse](https://github.com/kshvakov/clickhouse/) + - [go-clickhouse](https://github.com/roistat/go-clickhouse) + - [chconn](https://github.com/vahid-sohrabloo/chconn) + - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) + - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) + - [uptrace/go-clickhouse](https://clickhouse.uptrace.dev/) +- Swift + - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO) + - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor) +- NodeJs + - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) + - [node-clickhouse](https://github.com/apla/node-clickhouse) + - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) + - [clickhouse-client](https://github.com/depyronick/clickhouse-client) + - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) +- Perl + - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) + - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) + - [AnyEvent-ClickHouse](https://metacpan.org/release/AnyEvent-ClickHouse) +- Ruby + - [ClickHouse (Ruby)](https://github.com/shlima/click_house) + - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord) +- Rust + - [clickhouse.rs](https://github.com/loyd/clickhouse.rs) + - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) + - [Klickhouse](https://github.com/Protryon/klickhouse) +- R + - [RClickHouse](https://github.com/IMSMWU/RClickHouse) +- Java + - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) + - [clickhouse-client](https://github.com/Ecwid/clickhouse-client) +- Scala + - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) +- Kotlin + - [AORM](https://github.com/TanVD/AORM) +- C# + - [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient) + - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) + - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) + - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) +- Elixir + - [clickhousex](https://github.com/appodeal/clickhousex/) + - [pillar](https://github.com/sofakingworld/pillar) +- Nim + - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse) +- Haskell + - [hdbc-clickhouse](https://github.com/zaneli/hdbc-clickhouse) diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index cba6240788a..900764b8128 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -14,11 +14,11 @@ Web interface for ClickHouse in the [Tabix](https://github.com/tabixio/tabix) pr Features: -- Works with ClickHouse directly from the browser, without the need to install additional software. -- Query editor with syntax highlighting. -- Auto-completion of commands. -- Tools for graphical analysis of query execution. -- Colour scheme options. +- Works with ClickHouse directly from the browser, without the need to install additional software. +- Query editor with syntax highlighting. +- Auto-completion of commands. +- Tools for graphical analysis of query execution. +- Colour scheme options. [Tabix documentation](https://tabix.io/doc/). @@ -28,21 +28,21 @@ Features: Features: -- Query builder with syntax highlighting. View the response in a table or JSON view. -- Export query results as CSV or JSON. -- List of processes with descriptions. Write mode. Ability to stop (`KILL`) a process. -- Database graph. Shows all tables and their columns with additional information. -- A quick view of the column size. -- Server configuration. +- Query builder with syntax highlighting. View the response in a table or JSON view. +- Export query results as CSV or JSON. +- List of processes with descriptions. Write mode. Ability to stop (`KILL`) a process. +- Database graph. Shows all tables and their columns with additional information. +- A quick view of the column size. +- Server configuration. The following features are planned for development: -- Database management. -- User management. -- Real-time data analysis. -- Cluster monitoring. -- Cluster management. -- Monitoring replicated and Kafka tables. +- Database management. +- User management. +- Real-time data analysis. +- Cluster monitoring. +- Cluster management. +- Monitoring replicated and Kafka tables. ### LightHouse {#lighthouse} @@ -50,9 +50,9 @@ The following features are planned for development: Features: -- Table list with filtering and metadata. -- Table preview with filtering and sorting. -- Read-only queries execution. +- Table list with filtering and metadata. +- Table preview with filtering and sorting. +- Read-only queries execution. ### Redash {#redash} @@ -62,9 +62,9 @@ Supports for multiple data sources including ClickHouse, Redash can join results Features: -- Powerful editor of queries. -- Database explorer. -- Visualization tools, that allow you to represent data in different forms. +- Powerful editor of queries. +- Database explorer. +- Visualization tools, that allow you to represent data in different forms. ### Grafana {#grafana} @@ -92,10 +92,10 @@ Features: Features: -- Query development with syntax highlight and autocompletion. -- Table list with filters and metadata search. -- Table data preview. -- Full-text search. +- Query development with syntax highlight and autocompletion. +- Table list with filters and metadata search. +- Table data preview. +- Full-text search. By default, DBeaver does not connect using a session (the CLI for example does). If you require session support (for example to set settings for your session), edit the driver connection properties and set `session_id` to a random string (it uses the http connection under the hood). Then you can use any setting from the query window. @@ -105,10 +105,10 @@ By default, DBeaver does not connect using a session (the CLI for example does). Features: -- Autocompletion. -- Syntax highlighting for the queries and data output. -- Pager support for the data output. -- Custom PostgreSQL-like commands. +- Autocompletion. +- Syntax highlighting for the queries and data output. +- Pager support for the data output. +- Custom PostgreSQL-like commands. ### clickhouse-flamegraph {#clickhouse-flamegraph} @@ -132,15 +132,15 @@ Features: Features: -- Support query history (pagination, clear all, etc.) -- Support selected sql clauses query -- Support terminating query -- Support table management (metadata, delete, preview) -- Support database management (delete, create) -- Support custom query -- Support multiple data sources management(connection test, monitoring) -- Support monitor (processor, connection, query) -- Support migrate data +- Support query history (pagination, clear all, etc.) +- Support selected sql clauses query +- Support terminating query +- Support table management (metadata, delete, preview) +- Support database management (delete, create) +- Support custom query +- Support multiple data sources management(connection test, monitoring) +- Support monitor (processor, connection, query) +- Support migrate data ### Bytebase {#bytebase} @@ -148,13 +148,13 @@ Features: Features: -- Schema review between developers and DBAs. -- Database-as-Code, version control the schema in VCS such GitLab and trigger the deployment upon code commit. -- Streamlined deployment with per-environment policy. -- Full migration history. -- Schema drift detection. -- Backup and restore. -- RBAC. +- Schema review between developers and DBAs. +- Database-as-Code, version control the schema in VCS such GitLab and trigger the deployment upon code commit. +- Streamlined deployment with per-environment policy. +- Full migration history. +- Schema drift detection. +- Backup and restore. +- RBAC. ### Zeppelin-Interpreter-for-ClickHouse {#zeppelin-interpreter-for-clickhouse} @@ -166,9 +166,9 @@ Features: Features: -- An online SQL editor which can run your SQL code without any installing. -- You can observe all processes and mutations. For those unfinished processes, you can kill them in ui. -- The Metrics contains Cluster Analysis,Data Analysis,Query Analysis. +- An online SQL editor which can run your SQL code without any installing. +- You can observe all processes and mutations. For those unfinished processes, you can kill them in ui. +- The Metrics contains Cluster Analysis,Data Analysis,Query Analysis. ### ClickVisual {#clickvisual} @@ -218,12 +218,12 @@ Features: Features: -- Very fast code completion. -- ClickHouse syntax highlighting. -- Support for features specific to ClickHouse, for example, nested columns, table engines. -- Data Editor. -- Refactorings. -- Search and Navigation. +- Very fast code completion. +- ClickHouse syntax highlighting. +- Support for features specific to ClickHouse, for example, nested columns, table engines. +- Data Editor. +- Refactorings. +- Search and Navigation. ### Yandex DataLens {#yandex-datalens} @@ -231,15 +231,15 @@ Features: Features: -- Wide range of available visualizations, from simple bar charts to complex dashboards. -- Dashboards could be made publicly available. -- Support for multiple data sources including ClickHouse. -- Storage for materialized data based on ClickHouse. +- Wide range of available visualizations, from simple bar charts to complex dashboards. +- Dashboards could be made publicly available. +- Support for multiple data sources including ClickHouse. +- Storage for materialized data based on ClickHouse. DataLens is [available for free](https://cloud.yandex.com/docs/datalens/pricing) for low-load projects, even for commercial use. -- [DataLens documentation](https://cloud.yandex.com/docs/datalens/). -- [Tutorial](https://cloud.yandex.com/docs/solutions/datalens/data-from-ch-visualization) on visualizing data from a ClickHouse database. +- [DataLens documentation](https://cloud.yandex.com/docs/datalens/). +- [Tutorial](https://cloud.yandex.com/docs/solutions/datalens/data-from-ch-visualization) on visualizing data from a ClickHouse database. ### Holistics Software {#holistics-software} @@ -247,11 +247,11 @@ DataLens is [available for free](https://cloud.yandex.com/docs/datalens/pricing) Features: -- Automated email, Slack and Google Sheet schedules of reports. -- SQL editor with visualizations, version control, auto-completion, reusable query components and dynamic filters. -- Embedded analytics of reports and dashboards via iframe. -- Data preparation and ETL capabilities. -- SQL data modelling support for relational mapping of data. +- Automated email, Slack and Google Sheet schedules of reports. +- SQL editor with visualizations, version control, auto-completion, reusable query components and dynamic filters. +- Embedded analytics of reports and dashboards via iframe. +- Data preparation and ETL capabilities. +- SQL data modelling support for relational mapping of data. ### Looker {#looker} @@ -260,9 +260,9 @@ to integrate data with other applications. Features: -- Easy and agile development using LookML, a language which supports curated +- Easy and agile development using LookML, a language which supports curated [Data Modeling](https://looker.com/platform/data-modeling) to support report writers and end-users. -- Powerful workflow integration via Looker’s [Data Actions](https://looker.com/platform/actions). +- Powerful workflow integration via Looker’s [Data Actions](https://looker.com/platform/actions). [How to configure ClickHouse in Looker.](https://docs.looker.com/setup-and-management/database-config/clickhouse) @@ -272,12 +272,12 @@ Features: Features: -- Business users-friendly reports builder. -- Powerful report parameters for SQL filtering and report-specific query customizations. -- Can connect to ClickHouse both with a native TCP/IP endpoint and a HTTP(S) interface (2 different drivers). -- It is possible to use all power of ClickHouse SQL dialect in dimensions/measures definitions. -- [Web API](https://www.seektable.com/help/web-api-integration) for automated reports generation. -- Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore); data models (cubes) / reports configuration is a human-readable XML and can be stored under version control system. +- Business users-friendly reports builder. +- Powerful report parameters for SQL filtering and report-specific query customizations. +- Can connect to ClickHouse both with a native TCP/IP endpoint and a HTTP(S) interface (2 different drivers). +- It is possible to use all power of ClickHouse SQL dialect in dimensions/measures definitions. +- [Web API](https://www.seektable.com/help/web-api-integration) for automated reports generation. +- Supports reports development flow with account data [backup/restore](https://www.seektable.com/help/self-hosted-backup-restore); data models (cubes) / reports configuration is a human-readable XML and can be stored under version control system. SeekTable is [free](https://www.seektable.com/help/cloud-pricing) for personal/individual usage. diff --git a/docs/en/interfaces/third-party/index.md b/docs/en/interfaces/third-party/index.md index ad5ed0650a5..adb673d1ff8 100644 --- a/docs/en/interfaces/third-party/index.md +++ b/docs/en/interfaces/third-party/index.md @@ -8,10 +8,10 @@ sidebar_position: 24 This is a collection of links to third-party tools that provide some sort of interface to ClickHouse. It can be either visual interface, command-line interface or an API: -- [Client libraries](../../interfaces/third-party/client-libraries.md) -- [Integrations](../../interfaces/third-party/integrations.md) -- [GUI](../../interfaces/third-party/gui.md) -- [Proxies](../../interfaces/third-party/proxy.md) +- [Client libraries](../../interfaces/third-party/client-libraries.md) +- [Integrations](../../interfaces/third-party/integrations.md) +- [GUI](../../interfaces/third-party/gui.md) +- [Proxies](../../interfaces/third-party/proxy.md) :::note Generic tools that support common API like [ODBC](../../interfaces/odbc.md) or [JDBC](../../interfaces/jdbc.md) usually can work with ClickHouse as well, but are not listed here because there are way too many of them. diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index a089b3eff17..3e1b1e84f5d 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -12,104 +12,104 @@ ClickHouse, Inc. does **not** maintain the tools and libraries listed below and ## Infrastructure Products {#infrastructure-products} -- Relational database management systems - - [MySQL](https://www.mysql.com) - - [mysql2ch](https://github.com/long2ice/mysql2ch) - - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) - - [PostgreSQL](https://www.postgresql.org) - - [clickhousedb_fdw](https://github.com/Percona-Lab/clickhousedb_fdw) - - [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - - [pg2ch](https://github.com/mkabilov/pg2ch) - - [clickhouse_fdw](https://github.com/adjust/clickhouse_fdw) - - [MSSQL](https://en.wikipedia.org/wiki/Microsoft_SQL_Server) - - [ClickHouseMigrator](https://github.com/zlzforever/ClickHouseMigrator) -- Message queues - - [Kafka](https://kafka.apache.org) - - [clickhouse_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/)) - - [stream-loader-clickhouse](https://github.com/adform/stream-loader) -- Batch processing - - [Spark](https://spark.apache.org) - - [spark-clickhouse-connector](https://github.com/housepower/spark-clickhouse-connector) -- Stream processing - - [Flink](https://flink.apache.org) - - [flink-clickhouse-sink](https://github.com/ivi-ru/flink-clickhouse-sink) -- Object storages - - [S3](https://en.wikipedia.org/wiki/Amazon_S3) - - [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) -- Container orchestration - - [Kubernetes](https://kubernetes.io) - - [clickhouse-operator](https://github.com/Altinity/clickhouse-operator) -- Configuration management - - [puppet](https://puppet.com) - - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) - - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) -- Monitoring - - [Graphite](https://graphiteapp.org) - - [graphouse](https://github.com/ClickHouse/graphouse) - - [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) - - [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse) - - [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - optimizes staled partitions in [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) if rules from [rollup configuration](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) could be applied - - [Grafana](https://grafana.com/) - - [clickhouse-grafana](https://github.com/Vertamedia/clickhouse-grafana) - - [Prometheus](https://prometheus.io/) - - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) - - [PromHouse](https://github.com/Percona-Lab/PromHouse) - - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/)) - - [Nagios](https://www.nagios.org/) - - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) - - [check_clickhouse.py](https://github.com/innogames/igmonplugins/blob/master/src/check_clickhouse.py) - - [Zabbix](https://www.zabbix.com) - - [clickhouse-zabbix-template](https://github.com/Altinity/clickhouse-zabbix-template) - - [Sematext](https://sematext.com/) - - [clickhouse integration](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) -- Logging - - [rsyslog](https://www.rsyslog.com/) - - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) - - [fluentd](https://www.fluentd.org) - - [loghouse](https://github.com/flant/loghouse) (for [Kubernetes](https://kubernetes.io)) - - [logagent](https://www.sematext.com/logagent) - - [logagent output-plugin-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) -- Geo - - [MaxMind](https://dev.maxmind.com/geoip/) - - [clickhouse-maxmind-geoip](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip) -- AutoML - - [MindsDB](https://mindsdb.com/) - - [MindsDB](https://github.com/mindsdb/mindsdb) - Predictive AI layer for ClickHouse database. +- Relational database management systems + - [MySQL](https://www.mysql.com) + - [mysql2ch](https://github.com/long2ice/mysql2ch) + - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) + - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) + - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) + - [PostgreSQL](https://www.postgresql.org) + - [clickhousedb_fdw](https://github.com/Percona-Lab/clickhousedb_fdw) + - [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) + - [pg2ch](https://github.com/mkabilov/pg2ch) + - [clickhouse_fdw](https://github.com/adjust/clickhouse_fdw) + - [MSSQL](https://en.wikipedia.org/wiki/Microsoft_SQL_Server) + - [ClickHouseMigrator](https://github.com/zlzforever/ClickHouseMigrator) +- Message queues + - [Kafka](https://kafka.apache.org) + - [clickhouse_sinker](https://github.com/housepower/clickhouse_sinker) (uses [Go client](https://github.com/ClickHouse/clickhouse-go/)) + - [stream-loader-clickhouse](https://github.com/adform/stream-loader) +- Batch processing + - [Spark](https://spark.apache.org) + - [spark-clickhouse-connector](https://github.com/housepower/spark-clickhouse-connector) +- Stream processing + - [Flink](https://flink.apache.org) + - [flink-clickhouse-sink](https://github.com/ivi-ru/flink-clickhouse-sink) +- Object storages + - [S3](https://en.wikipedia.org/wiki/Amazon_S3) + - [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup) +- Container orchestration + - [Kubernetes](https://kubernetes.io) + - [clickhouse-operator](https://github.com/Altinity/clickhouse-operator) +- Configuration management + - [puppet](https://puppet.com) + - [innogames/clickhouse](https://forge.puppet.com/innogames/clickhouse) + - [mfedotov/clickhouse](https://forge.puppet.com/mfedotov/clickhouse) +- Monitoring + - [Graphite](https://graphiteapp.org) + - [graphouse](https://github.com/ClickHouse/graphouse) + - [carbon-clickhouse](https://github.com/lomik/carbon-clickhouse) + - [graphite-clickhouse](https://github.com/lomik/graphite-clickhouse) + - [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer) - optimizes staled partitions in [\*GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree) if rules from [rollup configuration](../../engines/table-engines/mergetree-family/graphitemergetree.md#rollup-configuration) could be applied + - [Grafana](https://grafana.com/) + - [clickhouse-grafana](https://github.com/Vertamedia/clickhouse-grafana) + - [Prometheus](https://prometheus.io/) + - [clickhouse_exporter](https://github.com/f1yegor/clickhouse_exporter) + - [PromHouse](https://github.com/Percona-Lab/PromHouse) + - [clickhouse_exporter](https://github.com/hot-wifi/clickhouse_exporter) (uses [Go client](https://github.com/kshvakov/clickhouse/)) + - [Nagios](https://www.nagios.org/) + - [check_clickhouse](https://github.com/exogroup/check_clickhouse/) + - [check_clickhouse.py](https://github.com/innogames/igmonplugins/blob/master/src/check_clickhouse.py) + - [Zabbix](https://www.zabbix.com) + - [clickhouse-zabbix-template](https://github.com/Altinity/clickhouse-zabbix-template) + - [Sematext](https://sematext.com/) + - [clickhouse integration](https://github.com/sematext/sematext-agent-integrations/tree/master/clickhouse) +- Logging + - [rsyslog](https://www.rsyslog.com/) + - [omclickhouse](https://www.rsyslog.com/doc/master/configuration/modules/omclickhouse.html) + - [fluentd](https://www.fluentd.org) + - [loghouse](https://github.com/flant/loghouse) (for [Kubernetes](https://kubernetes.io)) + - [logagent](https://www.sematext.com/logagent) + - [logagent output-plugin-clickhouse](https://sematext.com/docs/logagent/output-plugin-clickhouse/) +- Geo + - [MaxMind](https://dev.maxmind.com/geoip/) + - [clickhouse-maxmind-geoip](https://github.com/AlexeyKupershtokh/clickhouse-maxmind-geoip) +- AutoML + - [MindsDB](https://mindsdb.com/) + - [MindsDB](https://github.com/mindsdb/mindsdb) - Predictive AI layer for ClickHouse database. ## Programming Language Ecosystems {#programming-language-ecosystems} -- Python - - [SQLAlchemy](https://www.sqlalchemy.org) - - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - - [pandas](https://pandas.pydata.org) - - [pandahouse](https://github.com/kszucs/pandahouse) -- PHP - - [Doctrine](https://www.doctrine-project.org/) - - [dbal-clickhouse](https://packagist.org/packages/friendsofdoctrine/dbal-clickhouse) -- R - - [dplyr](https://db.rstudio.com/dplyr/) - - [RClickHouse](https://github.com/IMSMWU/RClickHouse) (uses [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp)) -- Java - - [Hadoop](http://hadoop.apache.org) - - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (uses [JDBC](../../sql-reference/table-functions/jdbc.md)) -- Scala - - [Akka](https://akka.io) - - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) -- C# - - [ADO.NET](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/ado-net-overview) - - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) - - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) - - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) - - [ClickHouse.Net.Migrations](https://github.com/ilyabreev/ClickHouse.Net.Migrations) - - [Linq To DB](https://github.com/linq2db/linq2db) -- Elixir - - [Ecto](https://github.com/elixir-ecto/ecto) - - [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto) -- Ruby - - [Ruby on Rails](https://rubyonrails.org/) - - [activecube](https://github.com/bitquery/activecube) - - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) - - [GraphQL](https://github.com/graphql) - - [activecube-graphql](https://github.com/bitquery/activecube-graphql) +- Python + - [SQLAlchemy](https://www.sqlalchemy.org) + - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) + - [pandas](https://pandas.pydata.org) + - [pandahouse](https://github.com/kszucs/pandahouse) +- PHP + - [Doctrine](https://www.doctrine-project.org/) + - [dbal-clickhouse](https://packagist.org/packages/friendsofdoctrine/dbal-clickhouse) +- R + - [dplyr](https://db.rstudio.com/dplyr/) + - [RClickHouse](https://github.com/IMSMWU/RClickHouse) (uses [clickhouse-cpp](https://github.com/artpaul/clickhouse-cpp)) +- Java + - [Hadoop](http://hadoop.apache.org) + - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (uses [JDBC](../../sql-reference/table-functions/jdbc.md)) +- Scala + - [Akka](https://akka.io) + - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) +- C# + - [ADO.NET](https://docs.microsoft.com/en-us/dotnet/framework/data/adonet/ado-net-overview) + - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) + - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) + - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) + - [ClickHouse.Net.Migrations](https://github.com/ilyabreev/ClickHouse.Net.Migrations) + - [Linq To DB](https://github.com/linq2db/linq2db) +- Elixir + - [Ecto](https://github.com/elixir-ecto/ecto) + - [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto) +- Ruby + - [Ruby on Rails](https://rubyonrails.org/) + - [activecube](https://github.com/bitquery/activecube) + - [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord) + - [GraphQL](https://github.com/graphql) + - [activecube-graphql](https://github.com/bitquery/activecube-graphql) diff --git a/docs/en/interfaces/third-party/proxy.md b/docs/en/interfaces/third-party/proxy.md index 4fd3f31f08b..43063ea4c1d 100644 --- a/docs/en/interfaces/third-party/proxy.md +++ b/docs/en/interfaces/third-party/proxy.md @@ -12,9 +12,9 @@ sidebar_label: Proxies Features: -- Per-user routing and response caching. -- Flexible limits. -- Automatic SSL certificate renewal. +- Per-user routing and response caching. +- Flexible limits. +- Automatic SSL certificate renewal. Implemented in Go. @@ -24,9 +24,9 @@ Implemented in Go. Features: -- In-memory and on-disk data buffering. -- Per-table routing. -- Load-balancing and health checking. +- In-memory and on-disk data buffering. +- Per-table routing. +- Load-balancing and health checking. Implemented in Go. @@ -36,8 +36,8 @@ Implemented in Go. Features: -- Group requests and send by threshold or interval. -- Multiple remote servers. -- Basic authentication. +- Group requests and send by threshold or interval. +- Multiple remote servers. +- Basic authentication. Implemented in Go. diff --git a/docs/en/operations/_troubleshooting.md b/docs/en/operations/_troubleshooting.md index a5c07ed18bd..dbb0dad7976 100644 --- a/docs/en/operations/_troubleshooting.md +++ b/docs/en/operations/_troubleshooting.md @@ -1,17 +1,17 @@ [//]: # (This file is included in FAQ > Troubleshooting) -- [Installation](#troubleshooting-installation-errors) -- [Connecting to the server](#troubleshooting-accepts-no-connections) -- [Query processing](#troubleshooting-does-not-process-queries) -- [Efficiency of query processing](#troubleshooting-too-slow) +- [Installation](#troubleshooting-installation-errors) +- [Connecting to the server](#troubleshooting-accepts-no-connections) +- [Query processing](#troubleshooting-does-not-process-queries) +- [Efficiency of query processing](#troubleshooting-too-slow) ## Installation {#troubleshooting-installation-errors} ### You Cannot Get Deb Packages from ClickHouse Repository with Apt-get {#you-cannot-get-deb-packages-from-clickhouse-repository-with-apt-get} -- Check firewall settings. -- If you cannot access the repository for any reason, download packages as described in the [install guide](../getting-started/install.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. +- Check firewall settings. +- If you cannot access the repository for any reason, download packages as described in the [install guide](../getting-started/install.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package. ### You Cannot Update Deb Packages from ClickHouse Repository with Apt-get {#you-cannot-update-deb-packages-from-clickhouse-repository-with-apt-get} @@ -73,8 +73,8 @@ After that follow the [install guide](../getting-started/install.md#from-rpm-pac Possible issues: -- The server is not running. -- Unexpected or wrong configuration parameters. +- The server is not running. +- Unexpected or wrong configuration parameters. ### Server Is Not Running {#server-is-not-running} @@ -98,8 +98,8 @@ The main log of `clickhouse-server` is in `/var/log/clickhouse-server/clickhouse If the server started successfully, you should see the strings: -- ` Application: starting up.` — Server started. -- ` Application: Ready for connections.` — Server is running and ready for connections. +- ` Application: starting up.` — Server started. +- ` Application: Ready for connections.` — Server is running and ready for connections. If `clickhouse-server` start failed with a configuration error, you should see the `` string with an error description. For example: @@ -149,30 +149,30 @@ This command starts the server as an interactive app with standard parameters of Check: -- Docker settings. +- Docker settings. If you run ClickHouse in Docker in an IPv6 network, make sure that `network=host` is set. -- Endpoint settings. +- Endpoint settings. Check [listen_host](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-listen_host) and [tcp_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) settings. ClickHouse server accepts localhost connections only by default. -- HTTP protocol settings. +- HTTP protocol settings. Check protocol settings for the HTTP API. -- Secure connection settings. +- Secure connection settings. Check: - - The [tcp_port_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting. - - Settings for [SSL certificates](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). + - The [tcp_port_secure](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) setting. + - Settings for [SSL certificates](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-openssl). Use proper parameters while connecting. For example, use the `port_secure` parameter with `clickhouse_client`. -- User settings. +- User settings. You might be using the wrong user name or password. diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index 2e241ec1980..b3583e156ad 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -14,10 +14,10 @@ All XML files should have the same root element, usually ``. As for Some settings specified in the main configuration file can be overridden in other configuration files: -- The `replace` or `remove` attributes can be specified for the elements of these configuration files. -- If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children. -- If `replace` is specified, it replaces the entire element with the specified one. -- If `remove` is specified, it deletes the element. +- The `replace` or `remove` attributes can be specified for the elements of these configuration files. +- If neither is specified, it combines the contents of elements recursively, replacing values of duplicate children. +- If `replace` is specified, it replaces the entire element with the specified one. +- If `remove` is specified, it deletes the element. You can also declare attributes as coming from environment variables by using `from_env="VARIABLE_NAME"`: diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md index 04c5840d514..ebf981690a9 100644 --- a/docs/en/operations/monitoring.md +++ b/docs/en/operations/monitoring.md @@ -12,15 +12,15 @@ import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md You can monitor: -- Utilization of hardware resources. -- ClickHouse server metrics. +- Utilization of hardware resources. +- ClickHouse server metrics. ## Resource Utilization {#resource-utilization} ClickHouse also monitors the state of hardware resources by itself such as: -- Load and temperature on processors. -- Utilization of storage system, RAM and network. +- Load and temperature on processors. +- Utilization of storage system, RAM and network. This data is collected in the `system.asynchronous_metric_log` table. @@ -32,8 +32,8 @@ To track server events use server logs. See the [logger](../operations/server-co ClickHouse collects: -- Different metrics of how the server uses computational resources. -- Common statistics on query processing. +- Different metrics of how the server uses computational resources. +- Common statistics on query processing. You can find metrics in the [system.metrics](../operations/system-tables/metrics.md#system_tables-metrics), [system.events](../operations/system-tables/events.md#system_tables-events), and [system.asynchronous_metrics](../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) tables. diff --git a/docs/en/operations/opentelemetry.md b/docs/en/operations/opentelemetry.md index 15185f7ae6b..70f64d08ba3 100644 --- a/docs/en/operations/opentelemetry.md +++ b/docs/en/operations/opentelemetry.md @@ -61,3 +61,7 @@ FROM system.opentelemetry_span_log ``` In case of any errors, the part of the log data for which the error has occurred will be silently lost. Check the server log for error messages if the data does not arrive. + +## Related Content + +- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse) diff --git a/docs/en/operations/optimizing-performance/index.md b/docs/en/operations/optimizing-performance/index.md index e25f3b4adb7..83e9430ed27 100644 --- a/docs/en/operations/optimizing-performance/index.md +++ b/docs/en/operations/optimizing-performance/index.md @@ -6,4 +6,4 @@ sidebar_position: 52 # Optimizing Performance -- [Sampling query profiler](../../operations/optimizing-performance/sampling-query-profiler.md) +- [Sampling query profiler](../../operations/optimizing-performance/sampling-query-profiler.md) diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index ab42eec4190..f5d0e5d6aed 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -25,11 +25,11 @@ SETTINGS allow_introspection_functions = 1 In self-managed deployments, to use query profiler: -- Setup the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration. +- Setup the [trace_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) section of the server configuration. This section configures the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table containing the results of the profiler functioning. It is configured by default. Remember that data in this table is valid only for a running server. After the server restart, ClickHouse does not clean up the table and all the stored virtual memory address may become invalid. -- Setup the [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. +- Setup the [query_profiler_cpu_time_period_ns](../../operations/settings/settings.md#query_profiler_cpu_time_period_ns) or [query_profiler_real_time_period_ns](../../operations/settings/settings.md#query_profiler_real_time_period_ns) settings. Both settings can be used simultaneously. These settings allow you to configure profiler timers. As these are the session settings, you can get different sampling frequency for the whole server, individual users or user profiles, for your interactive session, and for each individual query. @@ -37,13 +37,13 @@ The default sampling frequency is one sample per second and both CPU and real ti To analyze the `trace_log` system table: -- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages). +- Install the `clickhouse-common-static-dbg` package. See [Install from DEB Packages](../../getting-started/install.md#install-from-deb-packages). -- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting. +- Allow introspection functions by the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting. For security reasons, introspection functions are disabled by default. -- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. +- Use the `addressToLine`, `addressToLineWithInlines`, `addressToSymbol` and `demangle` [introspection functions](../../sql-reference/functions/introspection.md) to get function names and their positions in ClickHouse code. To get a profile for some query, you need to aggregate data from the `trace_log` table. You can aggregate data by individual functions or by the whole stack traces. If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/third-party/gui.md#clickhouse-flamegraph-clickhouse-flamegraph) and [speedscope](https://github.com/laplab/clickhouse-speedscope). @@ -51,14 +51,14 @@ If you need to visualize `trace_log` info, try [flamegraph](../../interfaces/thi In this example we: -- Filtering `trace_log` data by a query identifier and the current date. +- Filtering `trace_log` data by a query identifier and the current date. -- Aggregating by stack trace. +- Aggregating by stack trace. -- Using introspection functions, we will get a report of: +- Using introspection functions, we will get a report of: - - Names of symbols and corresponding source code functions. - - Source code locations of these functions. + - Names of symbols and corresponding source code functions. + - Source code locations of these functions. diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index 7b106909cf0..50f73e2c1bb 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -88,6 +88,33 @@ If the query was aborted due to an exception or user cancellation, no entry is w The size of the query cache in bytes, the maximum number of cache entries and the maximum size of individual cache entries (in bytes and in records) can be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache). +It is also possible to limit the cache usage of individual users using [settings profiles](settings/settings-profiles.md) and [settings +constraints](settings/constraints-on-settings.md). More specifically, you can restrict the maximum amount of memory (in bytes) a user may +allocate in the query cache and the the maximum number of stored query results. For that, first provide configurations +[query_cache_max_size_in_bytes](settings/settings.md#query-cache-max-size-in-bytes) and +[query_cache_max_entries](settings/settings.md#query-cache-size-max-items) in a user profile in `users.xml`, then make both settings +readonly: + +``` xml + + + + 10000 + + 100 + + + + + + + + + + + +``` + To define how long a query must run at least such that its result can be cached, you can use setting [query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query @@ -124,3 +151,7 @@ Finally, entries in the query cache are not shared between users due to security row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can be marked accessible by other users (i.e. shared) by supplying setting [query_cache_share_between_users](settings/settings.md#query-cache-share-between-users). + +## Related Content + +- Blog: [Introducing the ClickHouse Query Cache](https://clickhouse.com/blog/introduction-to-the-clickhouse-query-cache-and-design) diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index f1f3ca78802..2f0cdec0983 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -12,8 +12,8 @@ The system also has a feature for limiting the complexity of a single query. See In contrast to query complexity restrictions, quotas: -- Place restrictions on a set of queries that can be run over a period of time, instead of limiting a single query. -- Account for resources spent on all remote servers for distributed query processing. +- Place restrictions on a set of queries that can be run over a period of time, instead of limiting a single query. +- Account for resources spent on all remote servers for distributed query processing. Let’s look at the section of the ‘users.xml’ file that defines quotas. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 7c97d0ab640..02145a2fb6c 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -45,17 +45,17 @@ Configuration template: `` fields: -- `min_part_size` – The minimum size of a data part. -- `min_part_size_ratio` – The ratio of the data part size to the table size. -- `method` – Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`. -- `level` – Compression level. See [Codecs](../../sql-reference/statements/create/table.md#create-query-general-purpose-codecs). +- `min_part_size` – The minimum size of a data part. +- `min_part_size_ratio` – The ratio of the data part size to the table size. +- `method` – Compression method. Acceptable values: `lz4`, `lz4hc`, `zstd`,`deflate_qpl`. +- `level` – Compression level. See [Codecs](../../sql-reference/statements/create/table.md#create-query-general-purpose-codecs). You can configure multiple `` sections. Actions when conditions are met: -- If a data part matches a condition set, ClickHouse uses the specified compression method. -- If a data part matches multiple condition sets, ClickHouse uses the first matched condition set. +- If a data part matches a condition set, ClickHouse uses the specified compression method. +- If a data part matches multiple condition sets, ClickHouse uses the first matched condition set. If no conditions met for a data part, ClickHouse uses the `lz4` compression. @@ -165,7 +165,7 @@ List of prefixes for [custom settings](../../operations/settings/index.md#custom **See Also** -- [Custom settings](../../operations/settings/index.md#custom_settings) +- [Custom settings](../../operations/settings/index.md#custom_settings) ## core_dump {#server_configuration_parameters-core_dump} @@ -173,7 +173,7 @@ Configures soft limit for core dump file size. Possible values: -- Positive integer. +- Positive integer. Default value: `1073741824` (1 GB). @@ -274,8 +274,8 @@ The path to the config file for dictionaries. Path: -- Specify the absolute path or the path relative to the server config file. -- The path can contain wildcards \* and ?. +- Specify the absolute path or the path relative to the server config file. +- The path can contain wildcards \* and ?. See also “[Dictionaries](../../sql-reference/dictionaries/index.md)”. @@ -291,8 +291,8 @@ The path to the config file for executable user defined functions. Path: -- Specify the absolute path or the path relative to the server config file. -- The path can contain wildcards \* and ?. +- Specify the absolute path or the path relative to the server config file. +- The path can contain wildcards \* and ?. See also “[Executable User Defined Functions](../../sql-reference/functions/index.md#executable-user-defined-functions).”. @@ -335,15 +335,15 @@ Sending data to [Graphite](https://github.com/graphite-project). Settings: -- host – The Graphite server. -- port – The port on the Graphite server. -- interval – The interval for sending, in seconds. -- timeout – The timeout for sending data, in seconds. -- root_path – Prefix for keys. -- metrics – Sending data from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. -- events – Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. -- events_cumulative – Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. -- asynchronous_metrics – Sending data from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. +- host – The Graphite server. +- port – The port on the Graphite server. +- interval – The interval for sending, in seconds. +- timeout – The timeout for sending data, in seconds. +- root_path – Prefix for keys. +- metrics – Sending data from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. +- events – Sending deltas data accumulated for the time period from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. +- events_cumulative – Sending cumulative data from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. +- asynchronous_metrics – Sending data from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. You can configure multiple `` clauses. For instance, you can use this for sending different data at different intervals. @@ -516,10 +516,10 @@ These credentials are common for replication via `HTTP` and `HTTPS`. The section contains the following parameters: -- `user` — Username. -- `password` — Password. -- `allow_empty` — If `true`, then other replicas are allowed to connect without authentication even if credentials are set. If `false`, then connections without authentication are refused. Default value: `false`. -- `old` — Contains old `user` and `password` used during credential rotation. Several `old` sections can be specified. +- `user` — Username. +- `password` — Password. +- `allow_empty` — If `true`, then other replicas are allowed to connect without authentication even if credentials are set. If `false`, then connections without authentication are refused. Default value: `false`. +- `old` — Contains old `user` and `password` used during credential rotation. Several `old` sections can be specified. **Credentials Rotation** @@ -584,12 +584,12 @@ Backlog (queue size of pending connections) of the listen socket. Default value: `4096` (as in linux [5.4+](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=19f92a030ca6d772ab44b22ee6a01378a8cb32d4)). Usually this value does not need to be changed, since: -- default value is large enough, -- and for accepting client's connections server has separate thread. +- default value is large enough, +- and for accepting client's connections server has separate thread. So even if you have `TcpExtListenOverflows` (from `nstat`) non zero and this counter grows for ClickHouse server it does not mean that this value need to be increased, since: -- usually if 4096 is not enough it shows some internal ClickHouse scaling issue, so it is better to report an issue. -- and it does not mean that the server can handle more connections later (and even if it could, by that moment clients may be gone or disconnected). +- usually if 4096 is not enough it shows some internal ClickHouse scaling issue, so it is better to report an issue. +- and it does not mean that the server can handle more connections later (and even if it could, by that moment clients may be gone or disconnected). Examples: @@ -603,13 +603,13 @@ Logging settings. Keys: -- `level` – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. -- `log` – The log file. Contains all the entries according to `level`. -- `errorlog` – Error log file. -- `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. -- `count` – The number of archived log files that ClickHouse stores. -- `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. -- `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. +- `level` – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. +- `log` – The log file. Contains all the entries according to `level`. +- `errorlog` – Error log file. +- `size` – Size of the file. Applies to `log` and `errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. +- `count` – The number of archived log files that ClickHouse stores. +- `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. +- `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. **Example** @@ -649,12 +649,12 @@ Writing to the syslog is also supported. Config example: Keys for syslog: -- use_syslog — Required setting if you want to write to the syslog. -- address — The host\[:port\] of syslogd. If omitted, the local daemon is used. -- hostname — Optional. The name of the host that logs are sent from. -- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the “LOG_” prefix: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`, and so on). +- use_syslog — Required setting if you want to write to the syslog. +- address — The host\[:port\] of syslogd. If omitted, the local daemon is used. +- hostname — Optional. The name of the host that logs are sent from. +- facility — [The syslog facility keyword](https://en.wikipedia.org/wiki/Syslog#Facility) in uppercase letters with the “LOG_” prefix: (`LOG_USER`, `LOG_DAEMON`, `LOG_LOCAL3`, and so on). Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON` otherwise. -- format – Message format. Possible values: `bsd` and `syslog.` +- format – Message format. Possible values: `bsd` and `syslog.` ## send_crash_reports {#server_configuration_parameters-send_crash_reports} @@ -665,13 +665,13 @@ The server will need access to the public Internet via IPv4 (at the time of writ Keys: -- `enabled` – Boolean flag to enable the feature, `false` by default. Set to `true` to allow sending crash reports. -- `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either a separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax. -- `anonymize` - Avoid attaching the server hostname to the crash report. -- `http_proxy` - Configure HTTP proxy for sending crash reports. -- `debug` - Sets the Sentry client into debug mode. -- `tmp_path` - Filesystem path for temporary crash report state. -- `environment` - An arbitrary name of an environment in which the ClickHouse server is running. It will be mentioned in each crash report. The default value is `test` or `prod` depending on the version of ClickHouse. +- `enabled` – Boolean flag to enable the feature, `false` by default. Set to `true` to allow sending crash reports. +- `endpoint` – You can override the Sentry endpoint URL for sending crash reports. It can be either a separate Sentry account or your self-hosted Sentry instance. Use the [Sentry DSN](https://docs.sentry.io/error-reporting/quickstart/?platform=native#configure-the-sdk) syntax. +- `anonymize` - Avoid attaching the server hostname to the crash report. +- `http_proxy` - Configure HTTP proxy for sending crash reports. +- `debug` - Sets the Sentry client into debug mode. +- `tmp_path` - Filesystem path for temporary crash report state. +- `environment` - An arbitrary name of an environment in which the ClickHouse server is running. It will be mentioned in each crash report. The default value is `test` or `prod` depending on the version of ClickHouse. **Recommended way to use** @@ -713,8 +713,8 @@ Limits total RAM usage by the ClickHouse server. Possible values: -- Positive integer. -- 0 — Auto. +- Positive integer. +- 0 — Auto. Default value: `0`. @@ -724,8 +724,8 @@ The default `max_server_memory_usage` value is calculated as `memory_amount * ma **See also** -- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) -- [max_server_memory_usage_to_ram_ratio](#max_server_memory_usage_to_ram_ratio) +- [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) +- [max_server_memory_usage_to_ram_ratio](#max_server_memory_usage_to_ram_ratio) ## max_server_memory_usage_to_ram_ratio {#max_server_memory_usage_to_ram_ratio} @@ -733,8 +733,8 @@ Defines the fraction of total physical RAM amount, available to the ClickHouse s Possible values: -- Positive double. -- 0 — The ClickHouse server can use all available RAM. +- Positive double. +- 0 — The ClickHouse server can use all available RAM. Default value: `0.9`. @@ -750,21 +750,21 @@ On hosts with low RAM and swap, you possibly need setting `max_server_memory_usa **See Also** -- [max_server_memory_usage](#max_server_memory_usage) +- [max_server_memory_usage](#max_server_memory_usage) ## concurrent_threads_soft_limit_num {#concurrent_threads_soft_limit_num} The maximum number of query processing threads, excluding threads for retrieving data from remote servers, allowed to run all queries. This is not a hard limit. In case if the limit is reached the query will still get at least one thread to run. Query can upscale to desired number of threads during execution if more threads become available. Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. **See Also** -- [Concurrency Control](/docs/en/development/architecture.md#concurrency-control) +- [Concurrency Control](/docs/en/development/architecture.md#concurrency-control) ## concurrent_threads_soft_limit_ratio_to_cores {#concurrent_threads_soft_limit_ratio_to_cores} The maximum number of query processing threads as multiple of number of logical cores. @@ -772,8 +772,8 @@ More details: [concurrent_threads_soft_limit_num](#concurrent_threads_soft_limit Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -794,8 +794,8 @@ These settings can be modified at runtime and will take effect immediately. Quer Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `100`. @@ -815,8 +815,8 @@ These settings can be modified at runtime and will take effect immediately. Quer Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -836,8 +836,8 @@ These settings can be modified at runtime and will take effect immediately. Quer Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -853,8 +853,8 @@ The maximum number of simultaneously processed queries related to MergeTree tabl Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -874,8 +874,8 @@ Modifying the setting for one query or user does not affect other queries. Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -887,7 +887,7 @@ Default value: `0`. **See Also** -- [max_concurrent_queries](#max-concurrent-queries) +- [max_concurrent_queries](#max-concurrent-queries) ## max_connections {#max-connections} @@ -937,7 +937,7 @@ ClickHouse uses threads from the Global Thread pool to process queries. If there Possible values: -- Positive integer. +- Positive integer. Default value: `10000`. @@ -953,7 +953,7 @@ If the number of **idle** threads in the Global Thread pool is greater than `max Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -969,8 +969,8 @@ The maximum number of jobs that can be scheduled on the Global Thread pool. Incr Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `10000`. @@ -986,7 +986,7 @@ ClickHouse uses threads from the IO Thread pool to do some IO operations (e.g. t Possible values: -- Positive integer. +- Positive integer. Default value: `100`. @@ -996,7 +996,7 @@ If the number of **idle** threads in the IO Thread pool exceeds `max_io_thread_p Possible values: -- Positive integer. +- Positive integer. Default value: `0`. @@ -1006,8 +1006,8 @@ The maximum number of jobs that can be scheduled on the IO Thread pool. Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `10000`. @@ -1017,7 +1017,7 @@ ClickHouse uses threads from the Backups IO Thread pool to do S3 backup IO opera Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -1027,8 +1027,8 @@ If the number of **idle** threads in the Backups IO Thread pool exceeds `max_bac Possible values: -- Positive integer. -- Zero. +- Positive integer. +- Zero. Default value: `0`. @@ -1038,8 +1038,8 @@ The maximum number of jobs that can be scheduled on the Backups IO Thread pool. Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0`. @@ -1051,7 +1051,7 @@ Before changing it, please also take a look at related MergeTree settings, such Possible values: -- Any positive integer. +- Any positive integer. Default value: 16. @@ -1069,7 +1069,7 @@ The same as for `background_pool_size` setting `background_merges_mutations_conc Possible values: -- Any positive integer. +- Any positive integer. Default value: 2. @@ -1086,8 +1086,8 @@ Could be applied from the `default` profile for backward compatibility. Possible values: -- "round_robin" — Every concurrent merge and mutation is executed in round-robin order to ensure starvation-free operation. Smaller merges are completed faster than bigger ones just because they have fewer blocks to merge. -- "shortest_task_first" — Always execute smaller merge or mutation. Merges and mutations are assigned priorities based on their resulting size. Merges with smaller sizes are strictly preferred over bigger ones. This policy ensures the fastest possible merge of small parts but can lead to indefinite starvation of big merges in partitions heavily overloaded by INSERTs. +- "round_robin" — Every concurrent merge and mutation is executed in round-robin order to ensure starvation-free operation. Smaller merges are completed faster than bigger ones just because they have fewer blocks to merge. +- "shortest_task_first" — Always execute smaller merge or mutation. Merges and mutations are assigned priorities based on their resulting size. Merges with smaller sizes are strictly preferred over bigger ones. This policy ensures the fastest possible merge of small parts but can lead to indefinite starvation of big merges in partitions heavily overloaded by INSERTs. Default value: "round_robin". @@ -1103,7 +1103,7 @@ Sets the number of threads performing background moves for tables with MergeTree Possible values: -- Any positive integer. +- Any positive integer. Default value: 8. @@ -1119,7 +1119,7 @@ Sets the number of threads performing background fetches for tables with Replica Possible values: -- Any positive integer. +- Any positive integer. Default value: 8. @@ -1135,7 +1135,7 @@ Sets the number of threads performing background non-specialized operations like Possible values: -- Any positive integer. +- Any positive integer. Default value: 8. @@ -1151,7 +1151,7 @@ Sets the number of threads performing background flush in [Buffer](../../engines Possible values: -- Any positive integer. +- Any positive integer. Default value: 16. @@ -1161,7 +1161,7 @@ Sets the number of threads performing background tasks for [replicated](../../en Possible values: -- Any positive integer. +- Any positive integer. Default value: 128. @@ -1233,26 +1233,26 @@ Support for SSL is provided by the `libpoco` library. The available configuratio Keys for server/client settings: -- privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. -- certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate. -- caConfig (default: none) – The path to the file or directory that contains trusted CA certificates. If this points to a file, it must be in PEM format and can contain several CA certificates. If this points to a directory, it must contain one .pem file per CA certificate. The filenames are looked up by the CA subject name hash value. Details can be found in the man page of [SSL_CTX_load_verify_locations](https://www.openssl.org/docs/man3.0/man3/SSL_CTX_load_verify_locations.html). -- verificationMode (default: relaxed) – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. -- verificationDepth (default: 9) – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. -- loadDefaultCAFile (default: true) – Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`). -- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions. -- cacheSessions (default: false) – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. -- sessionIdContext (default: `${application.name}`) – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. -- sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) – The maximum number of sessions that the server caches. A value of 0 means unlimited sessions. -- sessionTimeout (default: [2h](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1926)) – Time for caching the session on the server. -- extendedVerification (default: false) – If enabled, verify that the certificate CN or SAN matches the peer hostname. -- requireTLSv1 (default: false) – Require a TLSv1 connection. Acceptable values: `true`, `false`. -- requireTLSv1_1 (default: false) – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. -- requireTLSv1_2 (default: false) – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. -- fips (default: false) – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS. -- privateKeyPassphraseHandler (default: `KeyConsoleHandler`)– Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. -- invalidCertificateHandler (default: `ConsoleCertificateHandler`) – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . -- disableProtocols (default: "") – Protocols that are not allowed to use. -- preferServerCiphers (default: false) – Preferred server ciphers on the client. +- privateKeyFile – The path to the file with the secret key of the PEM certificate. The file may contain a key and certificate at the same time. +- certificateFile – The path to the client/server certificate file in PEM format. You can omit it if `privateKeyFile` contains the certificate. +- caConfig (default: none) – The path to the file or directory that contains trusted CA certificates. If this points to a file, it must be in PEM format and can contain several CA certificates. If this points to a directory, it must contain one .pem file per CA certificate. The filenames are looked up by the CA subject name hash value. Details can be found in the man page of [SSL_CTX_load_verify_locations](https://www.openssl.org/docs/man3.0/man3/SSL_CTX_load_verify_locations.html). +- verificationMode (default: relaxed) – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. +- verificationDepth (default: 9) – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. +- loadDefaultCAFile (default: true) – Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`). +- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions. +- cacheSessions (default: false) – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. +- sessionIdContext (default: `${application.name}`) – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. +- sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) – The maximum number of sessions that the server caches. A value of 0 means unlimited sessions. +- sessionTimeout (default: [2h](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1926)) – Time for caching the session on the server. +- extendedVerification (default: false) – If enabled, verify that the certificate CN or SAN matches the peer hostname. +- requireTLSv1 (default: false) – Require a TLSv1 connection. Acceptable values: `true`, `false`. +- requireTLSv1_1 (default: false) – Require a TLSv1.1 connection. Acceptable values: `true`, `false`. +- requireTLSv1_2 (default: false) – Require a TLSv1.2 connection. Acceptable values: `true`, `false`. +- fips (default: false) – Activates OpenSSL FIPS mode. Supported if the library’s OpenSSL version supports FIPS. +- privateKeyPassphraseHandler (default: `KeyConsoleHandler`)– Class (PrivateKeyPassphraseHandler subclass) that requests the passphrase for accessing the private key. For example: ``, `KeyFileHandler`, `test`, ``. +- invalidCertificateHandler (default: `ConsoleCertificateHandler`) – Class (a subclass of CertificateHandler) for verifying invalid certificates. For example: ` ConsoleCertificateHandler ` . +- disableProtocols (default: "") – Protocols that are not allowed to use. +- preferServerCiphers (default: false) – Preferred server ciphers on the client. **Example of settings:** @@ -1292,12 +1292,12 @@ Queries are logged in the [system.part_log](../../operations/system-tables/part_ Use the following parameters to configure logging: -- `database` – Name of the database. -- `table` – Name of the system table. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` – Name of the database. +- `table` – Name of the system table. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. +- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `storage_policy` – Name of storage policy to use for the table (optional) **Example** @@ -1330,11 +1330,11 @@ Exposing metrics data for scraping from [Prometheus](https://prometheus.io). Settings: -- `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’. -- `port` – Port for `endpoint`. -- `metrics` – Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. -- `events` – Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. -- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. +- `endpoint` – HTTP endpoint for scraping metrics by prometheus server. Start from ‘/’. +- `port` – Port for `endpoint`. +- `metrics` – Flag that sets to expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table. +- `events` – Flag that sets to expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table. +- `asynchronous_metrics` – Flag that sets to expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table. **Example** @@ -1356,12 +1356,12 @@ Queries are logged in the [system.query_log](../../operations/system-tables/quer Use the following parameters to configure logging: -- `database` – Name of the database. -- `table` – Name of the system table the queries will be logged in. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` – Name of the database. +- `table` – Name of the system table the queries will be logged in. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. +- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `storage_policy` – Name of storage policy to use for the table (optional) If the table does not exist, ClickHouse will create it. If the structure of the query log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. @@ -1382,25 +1382,25 @@ If the table does not exist, ClickHouse will create it. If the structure of the The following settings are available: -- `max_size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB). -- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`. -- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB). -- `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil). +- `max_size_in_bytes`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB). +- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`. +- `max_entry_size_in_bytes`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB). +- `max_entry_size_in_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil). Changed settings take effect immediately. :::note -Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `max_size` or disable the query cache altogether. +Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `max_size_in_bytes` or disable the query cache altogether. ::: **Example** ```xml - 1073741824 + 1073741824 1024 - 1048576 - 30000000 + 1048576 + 30000000 ``` @@ -1412,12 +1412,12 @@ Queries are logged in the [system.query_thread_log](../../operations/system-tabl Use the following parameters to configure logging: -- `database` – Name of the database. -- `table` – Name of the system table the queries will be logged in. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` – Name of the database. +- `table` – Name of the system table the queries will be logged in. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. +- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `storage_policy` – Name of storage policy to use for the table (optional) If the table does not exist, ClickHouse will create it. If the structure of the query thread log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. @@ -1440,12 +1440,12 @@ Queries are logged in the [system.query_views_log](../../operations/system-table Use the following parameters to configure logging: -- `database` – Name of the database. -- `table` – Name of the system table the queries will be logged in. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` – Name of the database. +- `table` – Name of the system table the queries will be logged in. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. +- `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `storage_policy` – Name of storage policy to use for the table (optional) If the table does not exist, ClickHouse will create it. If the structure of the query views log changed when the ClickHouse server was updated, the table with the old structure is renamed, and a new table is created automatically. @@ -1466,13 +1466,13 @@ Settings for the [text_log](../../operations/system-tables/text_log.md#system_ta Parameters: -- `level` — Maximum Message Level (by default `Trace`) which will be stored in a table. -- `database` — Database name. -- `table` — Table name. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `level` — Maximum Message Level (by default `Trace`) which will be stored in a table. +- `database` — Database name. +- `table` — Table name. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `storage_policy` – Name of storage policy to use for the table (optional) **Example** ```xml @@ -1495,12 +1495,12 @@ Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_ Parameters: -- `database` — Database for storing a table. -- `table` — Table name. -- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. -- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` defined. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. -- `storage_policy` – Name of storage policy to use for the table (optional) +- `database` — Database for storing a table. +- `table` — Table name. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` defined. +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `storage_policy` – Name of storage policy to use for the table (optional) The default server configuration file `config.xml` contains the following settings section: @@ -1557,7 +1557,7 @@ For the value of the `incl` attribute, see the section “[Configuration files]( **See Also** -- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) ## timezone {#server_configuration_parameters-timezone} @@ -1731,11 +1731,11 @@ Default value: `0`. **See also** -- [max_temporary_data_on_disk_size_for_user](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_user) -- [max_temporary_data_on_disk_size_for_query](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_query) -- [tmp_path](#tmp-path) -- [tmp_policy](#tmp-policy) -- [max_server_memory_usage](#max_server_memory_usage) +- [max_temporary_data_on_disk_size_for_user](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_user) +- [max_temporary_data_on_disk_size_for_query](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_query) +- [tmp_path](#tmp-path) +- [tmp_policy](#tmp-policy) +- [max_server_memory_usage](#max_server_memory_usage) ## uncompressed_cache_size {#server-settings-uncompressed_cache_size} @@ -1785,10 +1785,10 @@ The directory with user defined files. Used for SQL user defined functions [SQL Path to the file that contains: -- User configurations. -- Access rights. -- Settings profiles. -- Quota settings. +- User configurations. +- Access rights. +- Settings profiles. +- Quota settings. **Example** @@ -1804,7 +1804,7 @@ ClickHouse uses ZooKeeper for storing metadata of replicas when using replicated This section contains the following parameters: -- `node` — ZooKeeper endpoint. You can set multiple endpoints. +- `node` — ZooKeeper endpoint. You can set multiple endpoints. For example: @@ -1847,9 +1847,9 @@ This section contains the following parameters: **See Also** -- [Replication](../../engines/table-engines/mergetree-family/replication.md) -- [ZooKeeper Programmer’s Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) -- [Optional secured communication between ClickHouse and Zookeeper](../ssl-zookeeper.md#secured-communication-with-zookeeper) +- [Replication](../../engines/table-engines/mergetree-family/replication.md) +- [ZooKeeper Programmer’s Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html) +- [Optional secured communication between ClickHouse and Zookeeper](../ssl-zookeeper.md#secured-communication-with-zookeeper) ## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper} @@ -1857,18 +1857,18 @@ Storage method for data part headers in ZooKeeper. This setting only applies to the `MergeTree` family. It can be specified: -- Globally in the [merge_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file. +- Globally in the [merge_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file. ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behaviour when the setting changes. -- For each table. +- For each table. When creating a table, specify the corresponding [engine setting](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The behaviour of an existing table with this setting does not change, even if the global setting changes. **Possible values** -- 0 — Functionality is turned off. -- 1 — Functionality is turned on. +- 0 — Functionality is turned off. +- 1 — Functionality is turned on. If `use_minimalistic_part_header_in_zookeeper = 1`, then [replicated](../../engines/table-engines/mergetree-family/replication.md) tables store the headers of the data parts compactly using a single `znode`. If the table contains many columns, this storage method significantly reduces the volume of the data stored in Zookeeper. @@ -1896,7 +1896,7 @@ The update is performed asynchronously, in a separate system thread. **See also** -- [background_schedule_pool_size](../../operations/settings/settings.md#background_schedule_pool_size) +- [background_schedule_pool_size](../../operations/settings/settings.md#background_schedule_pool_size) ## distributed_ddl {#server-settings-distributed_ddl} @@ -1954,9 +1954,9 @@ Default value: `/var/lib/clickhouse/access/`. ## user_directories {#user_directories} Section of the configuration file that contains settings: -- Path to configuration file with predefined users. -- Path to folder where users created by SQL commands are stored. -- ZooKeeper node path where users created by SQL commands are stored and replicated (experimental). +- Path to configuration file with predefined users. +- Path to folder where users created by SQL commands are stored. +- ZooKeeper node path where users created by SQL commands are stored and replicated (experimental). If this section is specified, the path from [users_config](../../operations/server-configuration-parameters/settings.md#users-config) and [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) won't be used. @@ -1991,8 +1991,8 @@ Users, roles, row policies, quotas, and profiles can be also stored in ZooKeeper You can also define sections `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server. To add an LDAP server as a remote user directory of users that are not defined locally, define a single `ldap` section with a following parameters: -- `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty. -- `roles` — section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. If any of the listed roles is not defined locally at the time of authentication, the authentication attempt will fail as if the provided password was incorrect. +- `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty. +- `roles` — section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server. If no roles are specified, user will not be able to perform any actions after authentication. If any of the listed roles is not defined locally at the time of authentication, the authentication attempt will fail as if the provided password was incorrect. **Example** @@ -2012,7 +2012,7 @@ Sets the memory size (in bytes) for a stack trace at every peak allocation step. Possible values: -- Positive integer. +- Positive integer. Default value: `4194304`. @@ -2022,8 +2022,8 @@ Allows to collect random allocations and deallocations and writes them in the [s Possible values: -- Positive integer. -- 0 — Writing of random allocations and deallocations in the `system.trace_log` system table is disabled. +- Positive integer. +- 0 — Writing of random allocations and deallocations in the `system.trace_log` system table is disabled. Default value: `0`. @@ -2033,7 +2033,7 @@ Sets the cache size (in bytes) for mapped files. This setting allows to avoid fr Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -2043,7 +2043,7 @@ Sets the cache size (in bytes) for [compiled expressions](../../operations/cache Possible values: -- Positive integer. +- Positive integer. Default value: `134217728`. @@ -2053,6 +2053,6 @@ Sets the cache size (in elements) for [compiled expressions](../../operations/ca Possible values: -- Positive integer. +- Positive integer. Default value: `10000`. diff --git a/docs/en/operations/settings/constraints-on-settings.md b/docs/en/operations/settings/constraints-on-settings.md index 83ef46053a4..1895a79cd3e 100644 --- a/docs/en/operations/settings/constraints-on-settings.md +++ b/docs/en/operations/settings/constraints-on-settings.md @@ -40,7 +40,7 @@ If the user tries to violate the constraints an exception is thrown and the sett There are supported few types of constraints: `min`, `max`, `readonly` (with alias `const`) and `changeable_in_readonly`. The `min` and `max` constraints specify upper and lower boundaries for a numeric setting and can be used in combination. The `readonly` or `const` constraint specifies that the user cannot change the corresponding setting at all. The `changeable_in_readonly` constraint type allows user to change the setting within `min`/`max` range even if `readonly` setting is set to 1, otherwise settings are not allow to be changed in `readonly=1` mode. Note that `changeable_in_readonly` is supported only if `settings_constraints_replace_previous` is enabled: ``` xml - true + true ``` diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index fae282c861f..eb1d5db5676 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -22,9 +22,9 @@ The order of priority for defining a setting is: 3. Query settings - - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. - - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). - - Define settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to the default or previous value after the query is executed. + - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. + - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). + - Define settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to the default or previous value after the query is executed. View the [Settings](./settings.md) page for a description of the ClickHouse settings. @@ -93,4 +93,4 @@ SELECT getSetting('custom_a'); **See Also** -- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md) +- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 4008b71ef8b..0b1207ee7b6 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -13,7 +13,7 @@ If the number of broken parts in a single partition exceeds the `max_suspicious_ Possible values: -- Any positive integer. +- Any positive integer. Default value: 100. @@ -54,7 +54,7 @@ If the number of active parts in a single partition exceeds the `parts_to_throw_ Possible values: -- Any positive integer. +- Any positive integer. Default value: 300. @@ -69,7 +69,7 @@ If the number of active parts in a single partition exceeds the `parts_to_delay_ Possible values: -- Any positive integer. +- Any positive integer. Default value: 150. @@ -81,7 +81,7 @@ If the number of inactive parts in a single partition more than the `inactive_pa Possible values: -- Any positive integer. +- Any positive integer. Default value: 0 (unlimited). @@ -91,7 +91,7 @@ If the number of inactive parts in a single partition in the table at least that Possible values: -- Any positive integer. +- Any positive integer. Default value: 0 (unlimited). @@ -101,7 +101,7 @@ The value in seconds, which is used to calculate the `INSERT` delay, if the numb Possible values: -- Any positive integer. +- Any positive integer. Default value: 1. @@ -127,7 +127,7 @@ If the total number of active parts in all partitions of a table exceeds the `ma Possible values: -- Any positive integer. +- Any positive integer. Default value: 100000. @@ -146,8 +146,8 @@ The number of most recently inserted blocks for which ClickHouse Keeper stores h Possible values: -- Any positive integer. -- 0 (disable deduplication) +- Any positive integer. +- 0 (disable deduplication) Default value: 100. @@ -161,8 +161,8 @@ The number of the most recently inserted blocks in the non-replicated [MergeTree Possible values: -- Any positive integer. -- 0 (disable deduplication). +- Any positive integer. +- 0 (disable deduplication). Default value: 0. @@ -174,7 +174,7 @@ The number of seconds after which the hash sums of the inserted blocks are remov Possible values: -- Any positive integer. +- Any positive integer. Default value: 604800 (1 week). @@ -188,8 +188,8 @@ The number of most recently async inserted blocks for which ClickHouse Keeper st Possible values: -- Any positive integer. -- 0 (disable deduplication for async_inserts) +- Any positive integer. +- 0 (disable deduplication for async_inserts) Default value: 10000. @@ -203,7 +203,7 @@ The number of seconds after which the hash sums of the async inserts are removed Possible values: -- Any positive integer. +- Any positive integer. Default value: 604800 (1 week). @@ -229,7 +229,7 @@ The minimum interval (in milliseconds) to update the `use_async_block_ids_cache` Possible values: -- Any positive integer. +- Any positive integer. Default value: 100. @@ -241,7 +241,7 @@ How many records may be in the ClickHouse Keeper log if there is inactive replic Possible values: -- Any positive integer. +- Any positive integer. Default value: 1000 @@ -251,7 +251,7 @@ Keep about this number of last records in ZooKeeper log, even if they are obsole Possible values: -- Any positive integer. +- Any positive integer. Default value: 10 @@ -261,7 +261,7 @@ If the time passed since a replication log (ClickHouse Keeper or ZooKeeper) entr Possible values: -- Any positive integer. +- Any positive integer. Default value: 3600 @@ -271,7 +271,7 @@ If the sum of the size of parts exceeds this threshold and the time since a repl Possible values: -- Any positive integer. +- Any positive integer. Default value: 10,737,418,240 @@ -281,7 +281,7 @@ When this setting has a value greater than zero, only a single replica starts th Possible values: -- Any positive integer. +- Any positive integer. Default value: 0 (seconds) @@ -295,7 +295,7 @@ Zero-copy replication is disabled by default in ClickHouse version 22.8 and high Possible values: -- Any positive integer. +- Any positive integer. Default value: 10800 @@ -305,7 +305,7 @@ Recompression works slow in most cases, so we don't start merge with recompressi Possible values: -- Any positive integer. +- Any positive integer. Default value: 7200 @@ -315,7 +315,7 @@ If true, this replica never merges parts and always downloads merged parts from Possible values: -- true, false +- true, false Default value: false @@ -325,7 +325,7 @@ Max broken parts, if more - deny automatic deletion. Possible values: -- Any positive integer. +- Any positive integer. Default value: 100 @@ -336,7 +336,7 @@ Max size of all broken parts, if more - deny automatic deletion. Possible values: -- Any positive integer. +- Any positive integer. Default value: 1,073,741,824 @@ -346,7 +346,7 @@ Do not apply ALTER if number of files for modification(deletion, addition) is gr Possible values: -- Any positive integer. +- Any positive integer. Default value: 75 @@ -356,7 +356,7 @@ Do not apply ALTER, if the number of files for deletion is greater than this set Possible values: -- Any positive integer. +- Any positive integer. Default value: 50 @@ -366,7 +366,7 @@ If the ratio of wrong parts to total number of parts is less than this - allow t Possible values: -- Float, 0.0 - 1.0 +- Float, 0.0 - 1.0 Default value: 0.5 @@ -376,7 +376,7 @@ Limit parallel fetches from endpoint (actually pool size). Possible values: -- Any positive integer. +- Any positive integer. Default value: 15 @@ -386,7 +386,7 @@ HTTP connection timeout for part fetch requests. Inherited from default profile Possible values: -- Any positive integer. +- Any positive integer. Default value: Inherited from default profile `http_connection_timeout` if not set explicitly. @@ -396,7 +396,7 @@ If true, replicated tables replicas on this node will try to acquire leadership. Possible values: -- true, false +- true, false Default value: true @@ -406,7 +406,7 @@ ZooKeeper session expiration check period, in seconds. Possible values: -- Any positive integer. +- Any positive integer. Default value: 60 @@ -416,7 +416,7 @@ Do not remove old local parts when repairing lost replica. Possible values: -- true, false +- true, false Default value: true @@ -426,8 +426,8 @@ HTTP connection timeout (in seconds) for part fetch requests. Inherited from def Possible values: -- Any positive integer. -- 0 - Use value of `http_connection_timeout`. +- Any positive integer. +- 0 - Use value of `http_connection_timeout`. Default value: 0. @@ -437,8 +437,8 @@ HTTP send timeout (in seconds) for part fetch requests. Inherited from default p Possible values: -- Any positive integer. -- 0 - Use value of `http_send_timeout`. +- Any positive integer. +- 0 - Use value of `http_send_timeout`. Default value: 0. @@ -448,8 +448,8 @@ HTTP receive timeout (in seconds) for fetch part requests. Inherited from defaul Possible values: -- Any positive integer. -- 0 - Use value of `http_receive_timeout`. +- Any positive integer. +- 0 - Use value of `http_receive_timeout`. Default value: 0. @@ -463,8 +463,8 @@ The setting isn't followed perfectly accurately. Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `0`. @@ -482,8 +482,8 @@ The setting isn't followed perfectly accurately. Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `0`. @@ -497,7 +497,7 @@ The time (in seconds) of storing inactive parts to protect against data loss dur Possible values: -- Any positive integer. +- Any positive integer. Default value: 480. @@ -520,7 +520,7 @@ The maximum total parts size (in bytes) to be merged into one part, if there are Possible values: -- Any positive integer. +- Any positive integer. Default value: 161061273600 (150 GB). @@ -534,7 +534,7 @@ The maximum total part size (in bytes) to be merged into one part, with the mini Possible values: -- Any positive integer. +- Any positive integer. Default value: 1048576 (1 MB) @@ -547,7 +547,7 @@ The number of rows that are read from the merged parts into memory. Possible values: -- Any positive integer. +- Any positive integer. Default value: 8192 @@ -560,7 +560,7 @@ This is to allow small merges to process - not filling the pool with long runnin Possible values: -- Any positive integer. +- Any positive integer. Default value: 8 @@ -571,7 +571,7 @@ This is to leave free threads for regular merges and avoid "Too many parts". Possible values: -- Any positive integer. +- Any positive integer. Default value: 20 @@ -585,7 +585,7 @@ The maximum number of threads that read parts when ClickHouse starts. Possible values: -- Any positive integer. +- Any positive integer. Default value: auto (number of CPU cores). @@ -599,7 +599,7 @@ The setting value specified when the table is created can be overridden via quer Possible values: -- Any positive integer. +- Any positive integer. Default value: -1 (unlimited). @@ -609,7 +609,7 @@ Merge parts if every part in the range is older than the value of `min_age_to_fo Possible values: -- Positive integer. +- Positive integer. Default value: 0 — Disabled. @@ -619,7 +619,7 @@ Whether `min_age_to_force_merge_seconds` should be applied only on the entire pa Possible values: -- true, false +- true, false Default value: false @@ -629,8 +629,8 @@ Enables to allow floating-point number as a partition key. Possible values: -- 0 — Floating-point partition key not allowed. -- 1 — Floating-point partition key allowed. +- 0 — Floating-point partition key not allowed. +- 1 — Floating-point partition key allowed. Default value: `0`. @@ -640,8 +640,8 @@ Enables the check at table creation, that the data type of a column for sampling Possible values: -- true — The check is enabled. -- false — The check is disabled at table creation. +- true — The check is enabled. +- false — The check is disabled at table creation. Default value: `true`. @@ -653,8 +653,8 @@ Sets minimal amount of bytes to enable balancing when distributing new big parts Possible values: -- Positive integer. -- 0 — Balancing is disabled. +- Positive integer. +- 0 — Balancing is disabled. Default value: `0`. @@ -670,8 +670,8 @@ The setting is applicable to `MergeTree` tables with enabled [data replication]( Possible values: -- 0 — Parts are removed. -- 1 — Parts are detached. +- 0 — Parts are removed. +- 1 — Parts are detached. Default value: `0`. @@ -681,7 +681,7 @@ Sets the interval in seconds for ClickHouse to execute the cleanup of old tempor Possible values: -- Any positive integer. +- Any positive integer. Default value: `60` seconds. @@ -691,7 +691,7 @@ Sets the interval in seconds for ClickHouse to execute the cleanup of old parts, Possible values: -- Any positive integer. +- Any positive integer. Default value: `1` second. @@ -701,8 +701,8 @@ Max number of concurrently executed queries related to the MergeTree table. Quer Possible values: -- Positive integer. -- 0 — No limit. +- Positive integer. +- 0 — No limit. Default value: `0` (no limit). @@ -718,8 +718,8 @@ The minimal number of marks read by the query for applying the [max_concurrent_q Possible values: -- Positive integer. -- 0 — Disabled (`max_concurrent_queries` limit applied to no queries). +- Positive integer. +- 0 — Disabled (`max_concurrent_queries` limit applied to no queries). Default value: `0` (limit never applied). @@ -831,3 +831,13 @@ You can see which parts of `s` were stored using the sparse serialization: │ s │ Sparse │ └────────┴────────────────────┘ ``` + +## clean_deleted_rows + +Enable/disable automatic deletion of rows flagged as `is_deleted` when perform `OPTIMIZE ... FINAL` on a table using the ReplacingMergeTree engine. When disabled, the `CLEANUP` keyword has to be added to the `OPTIMIZE ... FINAL` to have the same behaviour. + +Possible values: + +- `Always` or `Never`. + +Default value: `Never` \ No newline at end of file diff --git a/docs/en/operations/settings/permissions-for-queries.md b/docs/en/operations/settings/permissions-for-queries.md index c565de9b21a..9e9c564d426 100644 --- a/docs/en/operations/settings/permissions-for-queries.md +++ b/docs/en/operations/settings/permissions-for-queries.md @@ -21,8 +21,8 @@ Restricts permissions for read data, write data, and change settings queries. When set to 1, allows: -- All types of read queries (like SELECT and equivalent queries). -- Queries that modify only session context (like USE). +- All types of read queries (like SELECT and equivalent queries). +- Queries that modify only session context (like USE). When set to 2, allows the above plus: - SET and CREATE TEMPORARY TABLE @@ -33,9 +33,9 @@ When set to 2, allows the above plus: Possible values: -- 0 — Read, Write, and Change settings queries are allowed. -- 1 — Only Read data queries are allowed. -- 2 — Read data and Change settings queries are allowed. +- 0 — Read, Write, and Change settings queries are allowed. +- 1 — Only Read data queries are allowed. +- 2 — Read data and Change settings queries are allowed. Default value: 0 @@ -54,8 +54,8 @@ Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) q Possible values: -- 0 — DDL queries are not allowed. -- 1 — DDL queries are allowed. +- 0 — DDL queries are not allowed. +- 1 — DDL queries are allowed. Default value: 1 diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 7a6b2340d29..163ed5d5826 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -26,7 +26,7 @@ It can take one of two values: `throw` or `break`. Restrictions on aggregation ( The maximum amount of RAM to use for running a query on a single server. -In the default configuration file, the maximum is 10 GB. +The default setting is unlimited (set to `0`). The setting does not consider the volume of available memory or the total volume of memory on the machine. The restriction applies to a single query within a single server. @@ -101,8 +101,8 @@ Enables or disables execution of `GROUP BY` clauses in external memory. See [GRO Possible values: -- Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../sql-reference/statements/select/group-by.md#select-group-by-clause) operation. -- 0 — `GROUP BY` in external memory disabled. +- Maximum volume of RAM (in bytes) that can be used by the single [GROUP BY](../../sql-reference/statements/select/group-by.md#select-group-by-clause) operation. +- 0 — `GROUP BY` in external memory disabled. Default value: 0. @@ -257,8 +257,8 @@ ClickHouse can proceed with different actions when the limit is reached. Use the Possible values: -- Positive integer. -- 0 — Unlimited number of rows. +- Positive integer. +- 0 — Unlimited number of rows. Default value: 0. @@ -274,8 +274,8 @@ ClickHouse can proceed with different actions when the limit is reached. Use [jo Possible values: -- Positive integer. -- 0 — Memory control is disabled. +- Positive integer. +- 0 — Memory control is disabled. Default value: 0. @@ -283,27 +283,27 @@ Default value: 0. Defines what action ClickHouse performs when any of the following join limits is reached: -- [max_bytes_in_join](#settings-max_bytes_in_join) -- [max_rows_in_join](#settings-max_rows_in_join) +- [max_bytes_in_join](#settings-max_bytes_in_join) +- [max_rows_in_join](#settings-max_rows_in_join) Possible values: -- `THROW` — ClickHouse throws an exception and breaks operation. -- `BREAK` — ClickHouse breaks operation and does not throw an exception. +- `THROW` — ClickHouse throws an exception and breaks operation. +- `BREAK` — ClickHouse breaks operation and does not throw an exception. Default value: `THROW`. **See Also** -- [JOIN clause](../../sql-reference/statements/select/join.md#select-join) -- [Join table engine](../../engines/table-engines/special/join.md) +- [JOIN clause](../../sql-reference/statements/select/join.md#select-join) +- [Join table engine](../../engines/table-engines/special/join.md) ## max_partitions_per_insert_block {#max-partitions-per-insert-block} Limits the maximum number of partitions in a single inserted block. -- Positive integer. -- 0 — Unlimited number of partitions. +- Positive integer. +- 0 — Unlimited number of partitions. Default value: 100. diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 91b67ee8238..ef4bbeeba89 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -24,8 +24,8 @@ Supported formats: Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -52,8 +52,8 @@ Supported formats: Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -72,8 +72,8 @@ Supported formats: Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -87,8 +87,8 @@ When this option is enabled, extended table metadata are sent from server to cli Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -103,8 +103,8 @@ For complex default expressions `input_format_defaults_for_omitted_fields` must Possible values: -- 0 — Inserting `NULL` into a not nullable column causes an exception. -- 1 — `NULL` fields are initialized with default column values. +- 0 — Inserting `NULL` into a not nullable column causes an exception. +- 1 — `NULL` fields are initialized with default column values. Default value: `1`. @@ -179,11 +179,11 @@ The setting does not apply to [date and time functions](../../sql-reference/func Possible values: -- `'best_effort'` — Enables extended parsing. +- `'best_effort'` — Enables extended parsing. ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`. -- `'basic'` — Use basic parser. +- `'basic'` — Use basic parser. ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. @@ -191,8 +191,8 @@ Default value: `'basic'`. See also: -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) ## date_time_output_format {#date_time_output_format} @@ -200,15 +200,15 @@ Allows choosing different output formats of the text representation of date and Possible values: -- `simple` - Simple output format. +- `simple` - Simple output format. ClickHouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. -- `iso` - ISO output format. +- `iso` - ISO output format. ClickHouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). -- `unix_timestamp` - Unix timestamp output format. +- `unix_timestamp` - Unix timestamp output format. ClickHouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. @@ -216,8 +216,8 @@ Default value: `simple`. See also: -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) ## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error} @@ -298,8 +298,8 @@ By default, when inserting data into a `Distributed` table with more than one sh Possible values: -- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. -- 1 — Insertion is done randomly among all available shards when no distributed key is given. +- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. +- 1 — Insertion is done randomly among all available shards when no distributed key is given. Default value: `0`. @@ -311,18 +311,18 @@ Enables or disables the insertion of JSON data with nested objects. Supported formats: -- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) +- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. See also: -- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. +- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. ## input_format_json_read_bools_as_numbers {#input_format_json_read_bools_as_numbers} @@ -373,8 +373,8 @@ Such integers are enclosed in quotes by default. This behavior is compatible wit Possible values: -- 0 — Integers are output without quotes. -- 1 — Integers are enclosed in quotes. +- 0 — Integers are output without quotes. +- 1 — Integers are enclosed in quotes. Default value: 1. @@ -390,8 +390,8 @@ Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/format Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -526,8 +526,8 @@ Enables the ability to output all rows as a JSON array in the [JSONEachRow](../. Possible values: -- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. -- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. +- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. +- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. Default value: `0`. @@ -594,8 +594,8 @@ When enabled, always treat enum values as enum ids for TSV input format. It's re Possible values: -- 0 — Enum values are parsed as values or as enum IDs. -- 1 — Enum values are parsed only as enum IDs. +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. Default value: 0. @@ -743,8 +743,8 @@ When enabled, always treat enum values as enum ids for CSV input format. It's re Possible values: -- 0 — Enum values are parsed as values or as enum IDs. -- 1 — Enum values are parsed only as enum IDs. +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. Default value: 0. @@ -873,11 +873,11 @@ Enables or disables the full SQL parser if the fast stream parser can’t parse Possible values: -- 0 — Disabled. +- 0 — Disabled. In this case, you must provide formatted data. See the [Formats](../../interfaces/formats.md) section. -- 1 — Enabled. +- 1 — Enabled. In this case, you can use an SQL expression as a value, but data insertion is much slower this way. If you insert only formatted data, then ClickHouse behaves as if the setting value is 0. @@ -923,8 +923,8 @@ Enables or disables template deduction for SQL expressions in [Values](../../int Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -934,9 +934,9 @@ For the following query: INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ... ``` -- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0`, expressions are interpreted separately for each row (this is very slow for large number of rows). -- If `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1`, expressions in the first, second and third rows are parsed using template `lower(String)` and interpreted together, expression in the forth row is parsed with another template (`upper(String)`). -- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1`, the same as in previous case, but also allows fallback to interpreting expressions separately if it’s not possible to deduce template. +- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0`, expressions are interpreted separately for each row (this is very slow for large number of rows). +- If `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1`, expressions in the first, second and third rows are parsed using template `lower(String)` and interpreted together, expression in the forth row is parsed with another template (`upper(String)`). +- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1`, the same as in previous case, but also allows fallback to interpreting expressions separately if it’s not possible to deduce template. ### input_format_values_accurate_types_of_literals {#input_format_values_accurate_types_of_literals} @@ -950,11 +950,11 @@ This setting is used only when `input_format_values_deduce_templates_of_expressi Possible values: -- 0 — Disabled. +- 0 — Disabled. In this case, ClickHouse may use a more general type for some literals (e.g., `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. -- 1 — Enabled. +- 1 — Enabled. In this case, ClickHouse checks the actual type of literal and uses an expression template of the corresponding type. In some cases, it may significantly slow down expression evaluation in `Values`. @@ -968,8 +968,8 @@ Enables or disables the ability to insert the data into [Nested](../../sql-refer Possible values: -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. Default value: `0`. @@ -997,8 +997,8 @@ Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardina Possible values: -- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type. -- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type. +- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type. +- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type. Default value: `0`. @@ -1028,8 +1028,8 @@ Enables or disables the ability to insert the data into [Nested](../../sql-refer Possible values: -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. Default value: `0`. @@ -1077,8 +1077,8 @@ Enables or disables the ability to insert the data into [Nested](../../sql-refer Possible values: -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. Default value: `0`. @@ -1192,8 +1192,8 @@ Enables using fields that are not specified in [Avro](../../interfaces/formats.m Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -1211,9 +1211,9 @@ Type: string Possible values: -- `null` — No compression -- `deflate` — Compress with Deflate (zlib) -- `snappy` — Compress with [Snappy](https://google.github.io/snappy/) +- `null` — No compression +- `deflate` — Compress with Deflate (zlib) +- `snappy` — Compress with [Snappy](https://google.github.io/snappy/) Default value: `snappy` (if available) or `deflate`. @@ -1260,8 +1260,8 @@ Limits the width of value displayed in [Pretty](../../interfaces/formats.md/#pre Possible values: -- Positive integer. -- 0 — The value is cut completely. +- Positive integer. +- 0 — The value is cut completely. Default value: `10000` symbols. @@ -1336,8 +1336,8 @@ Adds row numbers to output in the [Pretty](../../interfaces/formats.md/#pretty) Possible values: -- 0 — Output without row numbers. -- 1 — Output with row numbers. +- 0 — Output without row numbers. +- 1 — Output with row numbers. Default value: `0`. @@ -1381,12 +1381,12 @@ Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md/# Possible values: -- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). -- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). -- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). -- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). -- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). -- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). +- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). +- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). +- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). +- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). +- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). +- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). Default value: `'Escaped'`. @@ -1434,12 +1434,12 @@ Field escaping rule. Possible values: -- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). -- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). -- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). -- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). -- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). -- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). +- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). +- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). +- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). +- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). +- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). +- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). Default value: `Raw`. @@ -1457,9 +1457,9 @@ Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfac Possible values: -- `'by_values'` — Values in enums should be the same, names can be different. -- `'by_names'` — Names in enums should be the same, values can be different. -- `'by_name_case_insensitive'` — Names in enums should be the same case-insensitive, values can be different. +- `'by_values'` — Values in enums should be the same, names can be different. +- `'by_names'` — Names in enums should be the same, values can be different. +- `'by_name_case_insensitive'` — Names in enums should be the same case-insensitive, values can be different. Default value: `'by_values'`. diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md index bafac853377..9f6fe87ae5f 100644 --- a/docs/en/operations/settings/settings-users.md +++ b/docs/en/operations/settings/settings-users.md @@ -47,13 +47,13 @@ Structure of the `users` section: Password can be specified in plaintext or in SHA256 (hex format). -- To assign a password in plaintext (**not recommended**), place it in a `password` element. +- To assign a password in plaintext (**not recommended**), place it in a `password` element. For example, `qwerty`. The password can be left blank. -- To assign a password using its SHA256 hash, place it in a `password_sha256_hex` element. +- To assign a password using its SHA256 hash, place it in a `password_sha256_hex` element. For example, `65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5`. @@ -65,7 +65,7 @@ Password can be specified in plaintext or in SHA256 (hex format). -- For compatibility with MySQL clients, password can be specified in double SHA1 hash. Place it in `password_double_sha1_hex` element. +- For compatibility with MySQL clients, password can be specified in double SHA1 hash. Place it in `password_double_sha1_hex` element. For example, `08b4a0f1de6ad37da17359e592c8d74788a83eb0`. @@ -81,8 +81,8 @@ This setting enables or disables using of SQL-driven [access control and account Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -92,17 +92,17 @@ List of networks from which the user can connect to the ClickHouse server. Each element of the list can have one of the following forms: -- `` — IP address or network mask. +- `` — IP address or network mask. Examples: `213.180.204.3`, `10.0.0.1/8`, `10.0.0.1/255.255.255.0`, `2a02:6b8::3`, `2a02:6b8::3/64`, `2a02:6b8::3/ffff:ffff:ffff:ffff::`. -- `` — Hostname. +- `` — Hostname. Example: `example01.host.ru`. To check access, a DNS query is performed, and all returned IP addresses are compared to the peer address. -- `` — Regular expression for hostnames. +- `` — Regular expression for hostnames. Example, `^example\d\d-\d\d-\d\.host\.ru$` diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index da503bc02aa..c6fdcf317c3 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -40,6 +40,39 @@ SETTINGS additional_table_filters = (('table_1', 'x != 2')) └───┴──────┘ ``` +## additional_result_filter + +An additional filter expression to apply to the result of `SELECT` query. +This setting is not applied to any subquery. + +Default value: `''`. + +**Example** + +``` sql +insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +``` +```response +┌─x─┬─y────┐ +│ 1 │ a │ +│ 2 │ bb │ +│ 3 │ ccc │ +│ 4 │ dddd │ +└───┴──────┘ +``` +```sql +SELECT * +FROM table_1 +SETTINGS additional_result_filter = 'x != 2' +``` +```response +┌─x─┬─y────┐ +│ 1 │ a │ +│ 3 │ ccc │ +│ 4 │ dddd │ +└───┴──────┘ +``` + ## allow_nondeterministic_mutations {#allow_nondeterministic_mutations} User-level setting that allows mutations on replicated tables to make use of non-deterministic functions such as `dictGet`. @@ -71,17 +104,17 @@ ClickHouse applies this setting when the query contains the product of distribut Restrictions: -- Only applied for IN and JOIN subqueries. -- Only if the FROM section uses a distributed table containing more than one shard. -- If the subquery concerns a distributed table containing more than one shard. -- Not used for a table-valued [remote](../../sql-reference/table-functions/remote.md) function. +- Only applied for IN and JOIN subqueries. +- Only if the FROM section uses a distributed table containing more than one shard. +- If the subquery concerns a distributed table containing more than one shard. +- Not used for a table-valued [remote](../../sql-reference/table-functions/remote.md) function. Possible values: -- `deny` — Default value. Prohibits using these types of subqueries (returns the “Double-distributed in/JOIN subqueries is denied” exception). -- `local` — Replaces the database and table in the subquery with local ones for the destination server (shard), leaving the normal `IN`/`JOIN.` -- `global` — Replaces the `IN`/`JOIN` query with `GLOBAL IN`/`GLOBAL JOIN.` -- `allow` — Allows the use of these types of subqueries. +- `deny` — Default value. Prohibits using these types of subqueries (returns the “Double-distributed in/JOIN subqueries is denied” exception). +- `local` — Replaces the database and table in the subquery with local ones for the destination server (shard), leaving the normal `IN`/`JOIN.` +- `global` — Replaces the `IN`/`JOIN` query with `GLOBAL IN`/`GLOBAL JOIN.` +- `allow` — Allows the use of these types of subqueries. ## prefer_global_in_and_join {#prefer-global-in-and-join} @@ -89,8 +122,8 @@ Enables the replacement of `IN`/`JOIN` operators with `GLOBAL IN`/`GLOBAL JOIN`. Possible values: -- 0 — Disabled. `IN`/`JOIN` operators are not replaced with `GLOBAL IN`/`GLOBAL JOIN`. -- 1 — Enabled. `IN`/`JOIN` operators are replaced with `GLOBAL IN`/`GLOBAL JOIN`. +- 0 — Disabled. `IN`/`JOIN` operators are not replaced with `GLOBAL IN`/`GLOBAL JOIN`. +- 1 — Enabled. `IN`/`JOIN` operators are replaced with `GLOBAL IN`/`GLOBAL JOIN`. Default value: `0`. @@ -104,7 +137,7 @@ Another use case of `prefer_global_in_and_join` is accessing tables created by **See also:** -- [Distributed subqueries](../../sql-reference/operators/in.md/#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN` +- [Distributed subqueries](../../sql-reference/operators/in.md/#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN` ## enable_optimize_predicate_expression {#enable-optimize-predicate-expression} @@ -114,8 +147,8 @@ Predicate pushdown may significantly reduce network traffic for distributed quer Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -162,8 +195,8 @@ Use data skipping indexes during query execution. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -277,14 +310,14 @@ Sets the safety threshold for data volume generated by function [range](../../sq Possible values: -- Positive integer. +- Positive integer. Default value: `500,000,000`. **See Also** -- [max_block_size](#setting-max_block_size) -- [min_insert_block_size_rows](#min-insert-block-size-rows) +- [max_block_size](#setting-max_block_size) +- [min_insert_block_size_rows](#min-insert-block-size-rows) ## enable_http_compression {#settings-enable_http_compression} @@ -294,8 +327,8 @@ For more information, read the [HTTP interface description](../../interfaces/htt Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -315,8 +348,8 @@ For more information, read the [HTTP interface description](../../interfaces/htt Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -326,7 +359,7 @@ Sets the maximum URI length of an HTTP request. Possible values: -- Positive integer. +- Positive integer. Default value: 1048576. @@ -336,7 +369,7 @@ Sets the maximum number of addresses generated from patterns for the [remote](.. Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -346,7 +379,7 @@ Sets the maximum number of addresses generated from patterns for external storag Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -358,8 +391,8 @@ For more information, read the [HTTP interface description](../../interfaces/htt Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -369,8 +402,8 @@ Limits the maximum number of HTTP GET redirect hops for [URL](../../engines/tabl Possible values: -- Any positive integer number of hops. -- 0 — No hops allowed. +- Any positive integer number of hops. +- 0 — No hops allowed. Default value: 0. @@ -383,8 +416,8 @@ This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements Possible values: -- 0 — Inserting `NULL` into a not nullable column causes an exception. -- 1 — Default column value is inserted instead of `NULL`. +- 0 — Inserting `NULL` into a not nullable column causes an exception. +- 1 — Default column value is inserted instead of `NULL`. Default value: `1`. @@ -394,10 +427,10 @@ Sets default strictness for [JOIN clauses](../../sql-reference/statements/select Possible values: -- `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the normal `JOIN` behaviour from standard SQL. -- `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of `ANY` and `ALL` are the same. -- `ASOF` — For joining sequences with an uncertain match. -- `Empty string` — If `ALL` or `ANY` is not specified in the query, ClickHouse throws an exception. +- `ALL` — If the right table has several matching rows, ClickHouse creates a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from matching rows. This is the normal `JOIN` behaviour from standard SQL. +- `ANY` — If the right table has several matching rows, only the first one found is joined. If the right table has only one matching row, the results of `ANY` and `ALL` are the same. +- `ASOF` — For joining sequences with an uncertain match. +- `Empty string` — If `ALL` or `ANY` is not specified in the query, ClickHouse throws an exception. Default value: `ALL`. @@ -466,16 +499,16 @@ This setting applies only for `JOIN` operations with [Join](../../engines/table- Possible values: -- 0 — If the right table has more than one matching row, only the first one found is joined. -- 1 — If the right table has more than one matching row, only the last one found is joined. +- 0 — If the right table has more than one matching row, only the first one found is joined. +- 1 — If the right table has more than one matching row, only the last one found is joined. Default value: 0. See also: -- [JOIN clause](../../sql-reference/statements/select/join.md/#select-join) -- [Join table engine](../../engines/table-engines/special/join.md) -- [join_default_strictness](#settings-join_default_strictness) +- [JOIN clause](../../sql-reference/statements/select/join.md/#select-join) +- [Join table engine](../../engines/table-engines/special/join.md) +- [join_default_strictness](#settings-join_default_strictness) ## join_use_nulls {#join_use_nulls} @@ -483,8 +516,8 @@ Sets the type of [JOIN](../../sql-reference/statements/select/join.md) behaviour Possible values: -- 0 — The empty cells are filled with the default value of the corresponding field type. -- 1 — `JOIN` behaves the same way as in standard SQL. The type of the corresponding field is converted to [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable), and empty cells are filled with [NULL](../../sql-reference/syntax.md). +- 0 — The empty cells are filled with the default value of the corresponding field type. +- 1 — `JOIN` behaves the same way as in standard SQL. The type of the corresponding field is converted to [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable), and empty cells are filled with [NULL](../../sql-reference/syntax.md). Default value: 0. @@ -496,14 +529,14 @@ Columns for these keys are filled with either default value or `NULL` in corresp Possible values: -- 0 — The default value for the aggregation key type is used to produce missing values. -- 1 — ClickHouse executes `GROUP BY` the same way as the SQL standard says. The types of aggregation keys are converted to [Nullable](/docs/en/sql-reference/data-types/nullable.md/#data_type-nullable). Columns for corresponding aggregation keys are filled with [NULL](/docs/en/sql-reference/syntax.md) for rows that didn't use it. +- 0 — The default value for the aggregation key type is used to produce missing values. +- 1 — ClickHouse executes `GROUP BY` the same way as the SQL standard says. The types of aggregation keys are converted to [Nullable](/docs/en/sql-reference/data-types/nullable.md/#data_type-nullable). Columns for corresponding aggregation keys are filled with [NULL](/docs/en/sql-reference/syntax.md) for rows that didn't use it. Default value: 0. See also: -- [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md) +- [GROUP BY clause](/docs/en/sql-reference/statements/select/group-by.md) ## partial_merge_join_optimizations {#partial_merge_join_optimizations} @@ -513,8 +546,8 @@ By default, this setting enables improvements that could lead to wrong results. Possible values: -- 0 — Optimizations disabled. -- 1 — Optimizations enabled. +- 0 — Optimizations disabled. +- 1 — Optimizations enabled. Default value: 1. @@ -530,7 +563,7 @@ ClickHouse server: Possible values: -- Any positive integer. Recommended range of values: \[1000, 100000\]. +- Any positive integer. Recommended range of values: \[1000, 100000\]. Default value: 65536. @@ -542,7 +575,7 @@ The bigger the value of the setting, the more RAM is used and the less disk I/O Possible values: -- Any positive integer, starting from 2. +- Any positive integer, starting from 2. Default value: 64. @@ -556,24 +589,24 @@ Use this setting only for backward compatibility if your use cases depend on leg When the legacy behaviour is enabled: -- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping. -- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do. +- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are not equal because ClickHouse uses the logic with many-to-one left-to-right table keys mapping. +- Results of `ANY INNER JOIN` operations contain all rows from the left table like the `SEMI LEFT JOIN` operations do. When the legacy behaviour is disabled: -- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations. -- Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables. +- Results of `t1 ANY LEFT JOIN t2` and `t2 ANY RIGHT JOIN t1` operations are equal because ClickHouse uses the logic which provides one-to-many keys mapping in `ANY RIGHT JOIN` operations. +- Results of `ANY INNER JOIN` operations contain one row per key from both the left and right tables. Possible values: -- 0 — Legacy behaviour is disabled. -- 1 — Legacy behaviour is enabled. +- 0 — Legacy behaviour is disabled. +- 1 — Legacy behaviour is enabled. Default value: 0. See also: -- [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings) +- [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings) ## temporary_files_codec {#temporary_files_codec} @@ -581,8 +614,8 @@ Sets compression codec for temporary files used in sorting and joining operation Possible values: -- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied. -- NONE — No compression is applied. +- LZ4 — [LZ4](https://en.wikipedia.org/wiki/LZ4_(compression_algorithm)) compression is applied. +- NONE — No compression is applied. Default value: LZ4. @@ -606,7 +639,7 @@ If the number of rows to be read from a file of a [MergeTree](../../engines/tabl Possible values: -- Positive integer. +- Positive integer. Default value: `163840`. @@ -616,7 +649,7 @@ The minimum number of lines to read from one file before the [MergeTree](../../e Possible values: -- Positive integer. +- Positive integer. Default value: `163840`. @@ -626,7 +659,7 @@ If the number of bytes to read from one file of a [MergeTree](../../engines/tabl Possible value: -- Positive integer. +- Positive integer. Default value: `251658240`. @@ -636,7 +669,7 @@ The minimum number of bytes to read from one file before [MergeTree](../../engin Possible values: -- Positive integer. +- Positive integer. Default value: `251658240`. @@ -646,7 +679,7 @@ If the distance between two data blocks to be read in one file is less than `mer Possible values: -- Any positive integer. +- Any positive integer. Default value: 0. @@ -656,7 +689,7 @@ If the distance between two data blocks to be read in one file is less than `mer Possible values: -- Any positive integer. +- Any positive integer. Default value: 0. @@ -666,7 +699,7 @@ When searching for data, ClickHouse checks the data marks in the index file. If Possible values: -- Any positive even integer. +- Any positive even integer. Default value: 8. @@ -678,7 +711,7 @@ The cache of uncompressed blocks stores data extracted for queries. ClickHouse u Possible values: -- Any positive integer. +- Any positive integer. Default value: 128 ✕ 8192. @@ -690,7 +723,7 @@ The cache of uncompressed blocks stores data extracted for queries. ClickHouse u Possible values: -- Any positive integer. +- Any positive integer. Default value: 2013265920. @@ -702,8 +735,8 @@ ClickHouse uses this setting when reading data from tables. If the total storage Possible values: -- 0 — Direct I/O is disabled. -- Positive integer. +- 0 — Direct I/O is disabled. +- Positive integer. Default value: 0. @@ -713,14 +746,14 @@ Sets the method of data compression that is used for communication between serve Possible values: -- `LZ4` — sets LZ4 compression method. -- `ZSTD` — sets ZSTD compression method. +- `LZ4` — sets LZ4 compression method. +- `ZSTD` — sets ZSTD compression method. Default value: `LZ4`. **See Also** -- [network_zstd_compression_level](#network_zstd_compression_level) +- [network_zstd_compression_level](#network_zstd_compression_level) ## network_zstd_compression_level {#network_zstd_compression_level} @@ -728,7 +761,7 @@ Adjusts the level of ZSTD compression. Used only when [network_compression_metho Possible values: -- Positive integer from 1 to 15. +- Positive integer from 1 to 15. Default value: `1`. @@ -756,8 +789,8 @@ Only the queries with the following type will get to the log: - `QUERY_FINISH` - `EXCEPTION_WHILE_PROCESSING` -- Type: milliseconds -- Default value: 0 (any query) +- Type: milliseconds +- Default value: 0 (any query) ## log_queries_min_type {#settings-log-queries-min-type} @@ -785,8 +818,8 @@ Query threads log into the [system.query_thread_log](../../operations/system-tab Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: `1`. @@ -814,8 +847,8 @@ Allows to log formatted queries to the [system.query_log](../../operations/syste Possible values: -- 0 — Formatted queries are not logged in the system table. -- 1 — Formatted queries are logged in the system table. +- 0 — Formatted queries are not logged in the system table. +- 1 — Formatted queries are logged in the system table. Default value: `0`. @@ -827,7 +860,7 @@ It can be used to improve the readability of server logs. Additionally, it helps Possible values: -- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception. +- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception. Default value: empty string. @@ -857,8 +890,8 @@ Write time that processor spent during execution/waiting for data to `system.pro See also: -- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md#system-processors_profile_log) -- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) +- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md) +- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) ## max_insert_block_size {#settings-max_insert_block_size} @@ -878,8 +911,8 @@ Sets the minimum number of rows in the block that can be inserted into a table b Possible values: -- Positive integer. -- 0 — Squashing disabled. +- Positive integer. +- 0 — Squashing disabled. Default value: 1048576. @@ -889,8 +922,8 @@ Sets the minimum number of bytes in the block which can be inserted into a table Possible values: -- Positive integer. -- 0 — Squashing disabled. +- Positive integer. +- 0 — Squashing disabled. Default value: 268435456. @@ -902,8 +935,8 @@ Sets the time in seconds. If a replica's lag is greater than or equal to the set Possible values: -- Positive integer. -- 0 — Replica lags are not checked. +- Positive integer. +- 0 — Replica lags are not checked. To prevent the use of any replica with a non-zero lag, set this parameter to 1. @@ -930,8 +963,8 @@ The maximum number of threads to execute the `INSERT SELECT` query. Possible values: -- 0 (or 1) — `INSERT SELECT` no parallel execution. -- Positive integer. Bigger than 1. +- 0 (or 1) — `INSERT SELECT` no parallel execution. +- Positive integer. Bigger than 1. Default value: 0. @@ -977,8 +1010,8 @@ Limits maximum recursion depth in the recursive descent parser. Allows controlli Possible values: -- Positive integer. -- 0 — Recursion depth is unlimited. +- Positive integer. +- 0 — Recursion depth is unlimited. Default value: 1000. @@ -994,7 +1027,7 @@ Timeout to close idle TCP connections after specified number of seconds. Possible values: -- Positive integer (0 - close immediatly, after 0 seconds). +- Positive integer (0 - close immediatly, after 0 seconds). Default value: 3600. @@ -1038,8 +1071,8 @@ If the value is exceeded, the server throws an exception. Possible values: -- Positive integer. -- 0 — Unlimited depth. +- Positive integer. +- 0 — Unlimited depth. Default value: `5`. @@ -1051,8 +1084,8 @@ The setting isn't followed perfectly accurately. Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `0`. @@ -1072,8 +1105,8 @@ The setting isn't followed perfectly accurately. Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `0`. @@ -1090,7 +1123,7 @@ Could be used for throttling speed when replicating the data to add or replace n The timeout in milliseconds for connecting to a remote server for a Distributed table engine, if the ‘shard’ and ‘replica’ sections are used in the cluster definition. If unsuccessful, several attempts are made to connect to various replicas. -Default value: 50. +Default value: 1000. ## connection_pool_max_wait_ms {#connection-pool-max-wait-ms} @@ -1184,15 +1217,15 @@ Specifies the algorithm of replicas selection that is used for distributed query ClickHouse supports the following algorithms of choosing replicas: -- [Random](#load_balancing-random) (by default) -- [Nearest hostname](#load_balancing-nearest_hostname) -- [In order](#load_balancing-in_order) -- [First or random](#load_balancing-first_or_random) -- [Round robin](#load_balancing-round_robin) +- [Random](#load_balancing-random) (by default) +- [Nearest hostname](#load_balancing-nearest_hostname) +- [In order](#load_balancing-in_order) +- [First or random](#load_balancing-first_or_random) +- [Round robin](#load_balancing-round_robin) See also: -- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) +- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) ### Random (by Default) {#load_balancing-random} @@ -1252,8 +1285,8 @@ Enables/disables preferable using the localhost replica when processing distribu Possible values: -- 1 — ClickHouse always sends a query to the localhost replica if it exists. -- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#settings-load_balancing) setting. +- 1 — ClickHouse always sends a query to the localhost replica if it exists. +- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#settings-load_balancing) setting. Default value: 1. @@ -1279,7 +1312,7 @@ The maximum number of replicas for each shard when executing a query. Possible values: -- Positive integer. +- Positive integer. Default value: `1`. @@ -1322,8 +1355,8 @@ How to use `parallel_replicas_custom_key` expression for splitting work between Possible values: -- `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`. -- `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values. +- `default` — Use the default implementation using modulo operation on the `parallel_replicas_custom_key`. +- `range` — Split the entire value space of the expression in the ranges. This type of filtering is useful if values of `parallel_replicas_custom_key` are uniformly spread across the entire integer space, e.g. hash values. Default value: `default`. @@ -1350,14 +1383,14 @@ Enables or disables JIT-compilation of aggregate functions to native code. Enabl Possible values: -- 0 — Aggregation is done without JIT compilation. -- 1 — Aggregation is done using JIT compilation. +- 0 — Aggregation is done without JIT compilation. +- 1 — Aggregation is done using JIT compilation. Default value: `1`. **See Also** -- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression) +- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression) ## min_count_to_compile_aggregate_expression {#min_count_to_compile_aggregate_expression} @@ -1365,8 +1398,8 @@ The minimum number of identical aggregate expressions to start JIT-compilation. Possible values: -- Positive integer. -- 0 — Identical aggregate expressions are always JIT-compiled. +- Positive integer. +- 0 — Identical aggregate expressions are always JIT-compiled. Default value: `3`. @@ -1479,13 +1512,33 @@ Possible values: Default value: `0`. +## query_cache_max_size_in_bytes {#query-cache-max-size-in-bytes} + +The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. + +Possible values: + +- Positive integer >= 0. + +Default value: 0 (no restriction). + +## query_cache_max_entries {#query-cache-max-entries} + +The maximum number of query results the current user may store in the query cache. 0 means unlimited. + +Possible values: + +- Positive integer >= 0. + +Default value: 0 (no restriction). + ## insert_quorum {#settings-insert_quorum} Enables the quorum writes. -- If `insert_quorum < 2`, the quorum writes are disabled. -- If `insert_quorum >= 2`, the quorum writes are enabled. -- If `insert_quorum = 'auto'`, use majority number (`number_of_replicas / 2 + 1`) as quorum number. +- If `insert_quorum < 2`, the quorum writes are disabled. +- If `insert_quorum >= 2`, the quorum writes are enabled. +- If `insert_quorum = 'auto'`, use majority number (`number_of_replicas / 2 + 1`) as quorum number. Default value: 0 - disabled. @@ -1497,14 +1550,14 @@ When `insert_quorum_parallel` is disabled, all replicas in the quorum are consis ClickHouse generates an exception: -- If the number of available replicas at the time of the query is less than the `insert_quorum`. -- When `insert_quorum_parallel` is disabled and an attempt to write data is made when the previous block has not yet been inserted in `insert_quorum` of replicas. This situation may occur if the user tries to perform another `INSERT` query to the same table before the previous one with `insert_quorum` is completed. +- If the number of available replicas at the time of the query is less than the `insert_quorum`. +- When `insert_quorum_parallel` is disabled and an attempt to write data is made when the previous block has not yet been inserted in `insert_quorum` of replicas. This situation may occur if the user tries to perform another `INSERT` query to the same table before the previous one with `insert_quorum` is completed. See also: -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [insert_quorum_parallel](#settings-insert_quorum_parallel) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert_quorum_timeout](#settings-insert_quorum_timeout) +- [insert_quorum_parallel](#settings-insert_quorum_parallel) +- [select_sequential_consistency](#settings-select_sequential_consistency) ## insert_quorum_timeout {#settings-insert_quorum_timeout} @@ -1514,9 +1567,9 @@ Default value: 600 000 milliseconds (ten minutes). See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_parallel](#settings-insert_quorum_parallel) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert_quorum](#settings-insert_quorum) +- [insert_quorum_parallel](#settings-insert_quorum_parallel) +- [select_sequential_consistency](#settings-select_sequential_consistency) ## insert_quorum_parallel {#settings-insert_quorum_parallel} @@ -1524,16 +1577,16 @@ Enables or disables parallelism for quorum `INSERT` queries. If enabled, additio Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert_quorum](#settings-insert_quorum) +- [insert_quorum_timeout](#settings-insert_quorum_timeout) +- [select_sequential_consistency](#settings-select_sequential_consistency) ## select_sequential_consistency {#settings-select_sequential_consistency} @@ -1541,8 +1594,8 @@ Enables or disables sequential consistency for `SELECT` queries. Requires `inser Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -1554,9 +1607,9 @@ When `insert_quorum_parallel` is enabled (the default), then `select_sequential_ See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [insert_quorum_parallel](#settings-insert_quorum_parallel) +- [insert_quorum](#settings-insert_quorum) +- [insert_quorum_timeout](#settings-insert_quorum_timeout) +- [insert_quorum_parallel](#settings-insert_quorum_parallel) ## insert_deduplicate {#settings-insert-deduplicate} @@ -1564,8 +1617,8 @@ Enables or disables block deduplication of `INSERT` (for Replicated\* tables). Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -1587,8 +1640,8 @@ If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will Possible values: -- 0 — Insertions are made synchronously, one after another. -- 1 — Multiple asynchronous insertions enabled. +- 0 — Insertions are made synchronously, one after another. +- 1 — Multiple asynchronous insertions enabled. Default value: `0`. @@ -1598,8 +1651,8 @@ The maximum number of threads for background data parsing and insertion. Possible values: -- Positive integer. -- 0 — Asynchronous insertions are disabled. +- Positive integer. +- 0 — Asynchronous insertions are disabled. Default value: `16`. @@ -1609,8 +1662,8 @@ Enables or disables waiting for processing of asynchronous insertion. If enabled Possible values: -- 0 — Server returns `OK` even if the data is not yet inserted. -- 1 — Server returns `OK` only after the data is inserted. +- 0 — Server returns `OK` even if the data is not yet inserted. +- 1 — Server returns `OK` only after the data is inserted. Default value: `1`. @@ -1620,8 +1673,8 @@ The timeout in seconds for waiting for processing of asynchronous insertion. Possible values: -- Positive integer. -- 0 — Disabled. +- Positive integer. +- 0 — Disabled. Default value: [lock_acquire_timeout](#lock_acquire_timeout). @@ -1631,8 +1684,8 @@ The maximum size of the unparsed data in bytes collected per query before being Possible values: -- Positive integer. -- 0 — Asynchronous insertions are disabled. +- Positive integer. +- 0 — Asynchronous insertions are disabled. Default value: `100000`. @@ -1642,8 +1695,8 @@ The maximum number of insert queries per block before being inserted. This setti Possible values: -- Positive integer. -- 0 — Asynchronous insertions are disabled. +- Positive integer. +- 0 — Asynchronous insertions are disabled. Default value: `450`. @@ -1653,8 +1706,8 @@ The maximum timeout in milliseconds since the first `INSERT` query before insert Possible values: -- Positive integer. -- 0 — Timeout disabled. +- Positive integer. +- 0 — Timeout disabled. Default value: `200`. @@ -1664,8 +1717,8 @@ The maximum timeout in milliseconds since the last `INSERT` query before dumping Possible values: -- Positive integer. -- 0 — Timeout disabled. +- Positive integer. +- 0 — Timeout disabled. Default value: `0`. @@ -1675,8 +1728,8 @@ Enables or disables insert deduplication of `ASYNC INSERT` (for Replicated\* tab Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -1711,7 +1764,7 @@ user can avoid the same inserted data being deduplicated. Possible values: -- Any string +- Any string Default value: empty string (disabled) @@ -1754,8 +1807,8 @@ The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeep Possible values: -- Positive integer. -- 0 — Retries are disabled +- Positive integer. +- 0 — Retries are disabled Default value: 0 @@ -1775,8 +1828,8 @@ Initial timeout(in milliseconds) to retry a failed Keeper request during INSERT Possible values: -- Positive integer. -- 0 — No timeout +- Positive integer. +- 0 — No timeout Default value: 100 @@ -1786,8 +1839,8 @@ Maximum timeout (in milliseconds) to retry a failed Keeper request during INSERT Possible values: -- Positive integer. -- 0 — Maximum timeout is not limited +- Positive integer. +- 0 — Maximum timeout is not limited Default value: 10000 @@ -1797,8 +1850,8 @@ Limits the data volume (in bytes) that is received or transmitted over the netwo Possible values: -- Positive integer. -- 0 — Data volume control is disabled. +- Positive integer. +- 0 — Data volume control is disabled. Default value: 0. @@ -1808,8 +1861,8 @@ Limits the speed of the data exchange over the network in bytes per second. This Possible values: -- Positive integer. -- 0 — Bandwidth control is disabled. +- Positive integer. +- 0 — Bandwidth control is disabled. Default value: 0. @@ -1819,8 +1872,8 @@ Limits the speed of the data exchange over the network in bytes per second. This Possible values: -- Positive integer. -- 0 — Control of the data speed is disabled. +- Positive integer. +- 0 — Control of the data speed is disabled. Default value: 0. @@ -1830,8 +1883,8 @@ Limits the speed that data is exchanged at over the network in bytes per second. Possible values: -- Positive integer. -- 0 — Control of the data speed is disabled. +- Positive integer. +- 0 — Control of the data speed is disabled. Default value: 0. @@ -1841,11 +1894,11 @@ Specifies which of the `uniq*` functions should be used to perform the [COUNT(DI Possible values: -- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md/#agg_function-uniq) -- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md/#agg_function-uniqcombined) -- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md/#agg_function-uniqcombined64) -- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md/#agg_function-uniqhll12) -- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md/#agg_function-uniqexact) +- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md/#agg_function-uniq) +- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md/#agg_function-uniqcombined) +- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md/#agg_function-uniqcombined64) +- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md/#agg_function-uniqhll12) +- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md/#agg_function-uniqexact) Default value: `uniqExact`. @@ -1855,25 +1908,25 @@ Enables or disables silently skipping of unavailable shards. Shard is considered unavailable if all its replicas are unavailable. A replica is unavailable in the following cases: -- ClickHouse can’t connect to replica for any reason. +- ClickHouse can’t connect to replica for any reason. When connecting to a replica, ClickHouse performs several attempts. If all these attempts fail, the replica is considered unavailable. -- Replica can’t be resolved through DNS. +- Replica can’t be resolved through DNS. If replica’s hostname can’t be resolved through DNS, it can indicate the following situations: - - Replica’s host has no DNS record. It can occur in systems with dynamic DNS, for example, [Kubernetes](https://kubernetes.io), where nodes can be unresolvable during downtime, and this is not an error. + - Replica’s host has no DNS record. It can occur in systems with dynamic DNS, for example, [Kubernetes](https://kubernetes.io), where nodes can be unresolvable during downtime, and this is not an error. - - Configuration error. ClickHouse configuration file contains a wrong hostname. + - Configuration error. ClickHouse configuration file contains a wrong hostname. Possible values: -- 1 — skipping enabled. +- 1 — skipping enabled. If a shard is unavailable, ClickHouse returns a result based on partial data and does not report node availability issues. -- 0 — skipping disabled. +- 0 — skipping disabled. If a shard is unavailable, ClickHouse throws an exception. @@ -1885,9 +1938,9 @@ Do not merge aggregation states from different servers for distributed query pro Possible values: -- `0` — Disabled (final query processing is done on the initiator node). -- `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards. -- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`). +- `0` — Disabled (final query processing is done on the initiator node). +- `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards. +- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`). Default value: `0` @@ -1925,28 +1978,28 @@ FORMAT PrettyCompactMonoBlock Enables or disables [LIMIT](#limit) applying on each shard separatelly. This will allow to avoid: -- Sending extra rows over network; -- Processing rows behind the limit on the initiator. +- Sending extra rows over network; +- Processing rows behind the limit on the initiator. Starting from 21.9 version you cannot get inaccurate results anymore, since `distributed_push_down_limit` changes query execution only if at least one of the conditions met: -- [distributed_group_by_no_merge](#distributed-group-by-no-merge) > 0. -- Query **does not have** `GROUP BY`/`DISTINCT`/`LIMIT BY`, but it has `ORDER BY`/`LIMIT`. -- Query **has** `GROUP BY`/`DISTINCT`/`LIMIT BY` with `ORDER BY`/`LIMIT` and: - - [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled. - - [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) is enabled. +- [distributed_group_by_no_merge](#distributed-group-by-no-merge) > 0. +- Query **does not have** `GROUP BY`/`DISTINCT`/`LIMIT BY`, but it has `ORDER BY`/`LIMIT`. +- Query **has** `GROUP BY`/`DISTINCT`/`LIMIT BY` with `ORDER BY`/`LIMIT` and: + - [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled. + - [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) is enabled. Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: `1`. See also: -- [distributed_group_by_no_merge](#distributed-group-by-no-merge) -- [optimize_skip_unused_shards](#optimize-skip-unused-shards) -- [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) +- [distributed_group_by_no_merge](#distributed-group-by-no-merge) +- [optimize_skip_unused_shards](#optimize-skip-unused-shards) +- [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) ## optimize_skip_unused_shards_limit {#optimize-skip-unused-shards-limit} @@ -1962,8 +2015,8 @@ Enables or disables skipping of unused shards for [SELECT](../../sql-reference/s Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0 @@ -1973,8 +2026,8 @@ Rewrite IN in query for remote shards to exclude values that does not belong to Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1 (since it requires `optimize_skip_unused_shards` anyway, which `0` by default) @@ -1984,8 +2037,8 @@ Allow nondeterministic (like `rand` or `dictGet`, since later has some caveats w Possible values: -- 0 — Disallowed. -- 1 — Allowed. +- 0 — Disallowed. +- 1 — Allowed. Default value: 0 @@ -1995,9 +2048,9 @@ Controls [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence st Possible values: -- 0 — Disabled, `optimize_skip_unused_shards` works always. -- 1 — Enables `optimize_skip_unused_shards` only for the first level. -- 2 — Enables `optimize_skip_unused_shards` up to the second level. +- 0 — Disabled, `optimize_skip_unused_shards` works always. +- 1 — Enables `optimize_skip_unused_shards` only for the first level. +- 2 — Enables `optimize_skip_unused_shards` up to the second level. Default value: 0 @@ -2007,9 +2060,9 @@ Enables or disables query execution if [optimize_skip_unused_shards](#optimize-s Possible values: -- 0 — Disabled. ClickHouse does not throw an exception. -- 1 — Enabled. Query execution is disabled only if the table has a sharding key. -- 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table. +- 0 — Disabled. ClickHouse does not throw an exception. +- 1 — Enabled. Query execution is disabled only if the table has a sharding key. +- 2 — Enabled. Query execution is disabled regardless of whether a sharding key is defined for the table. Default value: 0 @@ -2019,9 +2072,9 @@ Controls [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shard Possible values: -- 0 - Disabled, `force_optimize_skip_unused_shards` works always. -- 1 — Enables `force_optimize_skip_unused_shards` only for the first level. -- 2 — Enables `force_optimize_skip_unused_shards` up to the second level. +- 0 - Disabled, `force_optimize_skip_unused_shards` works always. +- 1 — Enables `force_optimize_skip_unused_shards` only for the first level. +- 2 — Enables `force_optimize_skip_unused_shards` up to the second level. Default value: 0 @@ -2046,16 +2099,16 @@ The following types of queries are not supported (support for some of them may b Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0 See also: -- [distributed_group_by_no_merge](#distributed-group-by-no-merge) -- [distributed_push_down_limit](#distributed-push-down-limit) -- [optimize_skip_unused_shards](#optimize-skip-unused-shards) +- [distributed_group_by_no_merge](#distributed-group-by-no-merge) +- [distributed_push_down_limit](#distributed-push-down-limit) +- [optimize_skip_unused_shards](#optimize-skip-unused-shards) :::note Right now it requires `optimize_skip_unused_shards` (the reason behind this is that one day it may be enabled by default, and it will work correctly only if data was inserted via Distributed table, i.e. data is distributed according to sharding_key). @@ -2069,8 +2122,8 @@ By default, `OPTIMIZE` returns successfully even if it didn’t do anything. Thi Possible values: -- 1 — Throwing an exception is enabled. -- 0 — Throwing an exception is disabled. +- 1 — Throwing an exception is enabled. +- 0 — Throwing an exception is disabled. Default value: 0. @@ -2084,8 +2137,8 @@ By default, `OPTIMIZE TABLE ... FINAL` query rewrites the one part even if there Possible values: -- 1 - Enable optimization. -- 0 - Disable optimization. +- 1 - Enable optimization. +- 0 - Disable optimization. Default value: 0. @@ -2095,19 +2148,19 @@ Enables or disables optimization by transforming some functions to reading subco These functions can be transformed: -- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. -- [empty](../../sql-reference/functions/array-functions.md/#function-empty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. -- [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. -- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. -- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. -- [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. -- [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) to read the [keys](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. -- [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) to read the [values](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. +- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. +- [empty](../../sql-reference/functions/array-functions.md/#function-empty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. +- [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn. +- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn. +- [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) to read the [keys](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. +- [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) to read the [values](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn. Possible values: -- 0 — Optimization disabled. -- 1 — Optimization enabled. +- 0 — Optimization disabled. +- 1 — Optimization enabled. Default value: `0`. @@ -2124,49 +2177,49 @@ Default value: `1`. See also: -- [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns) +- [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns) ## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} -- Type: seconds -- Default value: 60 seconds +- Type: seconds +- Default value: 60 seconds Controls how fast errors in distributed tables are zeroed. If a replica is unavailable for some time, accumulates 5 errors, and distributed_replica_error_half_life is set to 1 second, then the replica is considered normal 3 seconds after the last error. See also: -- [load_balancing](#load_balancing-round_robin) -- [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) -- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) +- [load_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) +- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) ## distributed_replica_error_cap {#settings-distributed_replica_error_cap} -- Type: unsigned int -- Default value: 1000 +- Type: unsigned int +- Default value: 1000 The error count of each replica is capped at this value, preventing a single replica from accumulating too many errors. See also: -- [load_balancing](#load_balancing-round_robin) -- [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) -- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) +- [load_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) +- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) ## distributed_replica_max_ignored_errors {#settings-distributed_replica_max_ignored_errors} -- Type: unsigned int -- Default value: 0 +- Type: unsigned int +- Default value: 0 The number of errors that will be ignored while choosing replicas (according to `load_balancing` algorithm). See also: -- [load_balancing](#load_balancing-round_robin) -- [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) -- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) +- [load_balancing](#load_balancing-round_robin) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) +- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) ## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms} @@ -2174,7 +2227,7 @@ Base interval for the [Distributed](../../engines/table-engines/special/distribu Possible values: -- A positive integer number of milliseconds. +- A positive integer number of milliseconds. Default value: 100 milliseconds. @@ -2184,7 +2237,7 @@ Maximum interval for the [Distributed](../../engines/table-engines/special/distr Possible values: -- A positive integer number of milliseconds. +- A positive integer number of milliseconds. Default value: 30000 milliseconds (30 seconds). @@ -2196,8 +2249,8 @@ When batch sending is enabled, the [Distributed](../../engines/table-engines/spe Possible values: -- 1 — Enabled. -- 0 — Disabled. +- 1 — Enabled. +- 0 — Disabled. Default value: 0. @@ -2211,8 +2264,8 @@ So installing this setting to `1` will disable batching for such batches (i.e. t Possible values: -- 1 — Enabled. -- 0 — Disabled. +- 1 — Enabled. +- 0 — Disabled. Default value: 0. @@ -2234,7 +2287,7 @@ To use this setting, you need to set the `CAP_SYS_NICE` capability. The `clickho Possible values: -- You can set values in the range `[-20, 19]`. +- You can set values in the range `[-20, 19]`. Lower values mean higher priority. Threads with low `nice` priority values are executed more frequently than threads with high values. High values are preferable for long-running non-interactive queries because it allows them to quickly give up resources in favour of short interactive queries when they arrive. @@ -2246,14 +2299,14 @@ Sets the period for a real clock timer of the [query profiler](../../operations/ Possible values: -- Positive integer number, in nanoseconds. +- Positive integer number, in nanoseconds. Recommended values: - 10000000 (100 times a second) nanoseconds and less for single queries. - 1000000000 (once a second) for cluster-wide profiling. -- 0 for turning off the timer. +- 0 for turning off the timer. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -2261,7 +2314,7 @@ Default value: 1000000000 nanoseconds (once a second). See also: -- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) +- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) ## query_profiler_cpu_time_period_ns {#query_profiler_cpu_time_period_ns} @@ -2269,14 +2322,14 @@ Sets the period for a CPU clock timer of the [query profiler](../../operations/o Possible values: -- A positive integer number of nanoseconds. +- A positive integer number of nanoseconds. Recommended values: - 10000000 (100 times a second) nanoseconds and more for single queries. - 1000000000 (once a second) for cluster-wide profiling. -- 0 for turning off the timer. +- 0 for turning off the timer. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -2284,7 +2337,7 @@ Default value: 1000000000 nanoseconds. See also: -- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) +- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) ## memory_profiler_step {#memory_profiler_step} @@ -2292,9 +2345,9 @@ Sets the step of memory profiler. Whenever query memory usage becomes larger tha Possible values: -- A positive integer number of bytes. +- A positive integer number of bytes. -- 0 for turning off the memory profiler. +- 0 for turning off the memory profiler. Default value: 4,194,304 bytes (4 MiB). @@ -2304,9 +2357,9 @@ Sets the probability of collecting stacktraces at random allocations and dealloc Possible values: -- A positive floating-point number in the range [0..1]. +- A positive floating-point number in the range [0..1]. -- 0.0 for turning off the memory sampling. +- 0.0 for turning off the memory sampling. Default value: 0.0. @@ -2316,8 +2369,8 @@ Enables or disables collecting stacktraces on each update of profile events alon Possible values: -- 1 — Tracing of profile events enabled. -- 0 — Tracing of profile events disabled. +- 1 — Tracing of profile events enabled. +- 0 — Tracing of profile events disabled. Default value: 0. @@ -2327,15 +2380,15 @@ Enables or disables [introspections functions](../../sql-reference/functions/int Possible values: -- 1 — Introspection functions enabled. -- 0 — Introspection functions disabled. +- 1 — Introspection functions enabled. +- 0 — Introspection functions disabled. Default value: 0. **See Also** -- [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md) -- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) +- [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md) +- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log) ## input_format_parallel_parsing {#input-format-parallel-parsing} @@ -2343,8 +2396,8 @@ Enables or disables order-preserving parallel parsing of data formats. Supported Possible values: -- 1 — Enabled. -- 0 — Disabled. +- 1 — Enabled. +- 0 — Disabled. Default value: `1`. @@ -2354,15 +2407,15 @@ Enables or disables parallel formatting of data formats. Supported only for [TSV Possible values: -- 1 — Enabled. -- 0 — Disabled. +- 1 — Enabled. +- 0 — Disabled. Default value: `1`. ## min_chunk_bytes_for_parallel_parsing {#min-chunk-bytes-for-parallel-parsing} -- Type: unsigned int -- Default value: 1 MiB +- Type: unsigned int +- Default value: 1 MiB The minimum chunk size in bytes, which each thread will parse in parallel. @@ -2372,7 +2425,7 @@ Sleep time for merge selecting when no part is selected. A lower setting trigger Possible values: -- Any positive integer. +- Any positive integer. Default value: `5000`. @@ -2384,9 +2437,9 @@ If we execute `INSERT INTO distributed_table_a SELECT ... FROM distributed_table Possible values: -- 0 — Disabled. -- 1 — `SELECT` will be executed on each shard from the underlying table of the distributed engine. -- 2 — `SELECT` and `INSERT` will be executed on each shard from/to the underlying table of the distributed engine. +- 0 — Disabled. +- 1 — `SELECT` will be executed on each shard from the underlying table of the distributed engine. +- 2 — `SELECT` and `INSERT` will be executed on each shard from/to the underlying table of the distributed engine. Default value: 0. @@ -2398,15 +2451,15 @@ By default, when inserting data into a `Distributed` table, the ClickHouse serve Possible values: -- 0 — Data is inserted in asynchronous mode. -- 1 — Data is inserted in synchronous mode. +- 0 — Data is inserted in asynchronous mode. +- 1 — Data is inserted in synchronous mode. Default value: `0`. **See Also** -- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed) -- [Managing Distributed Tables](../../sql-reference/statements/system.md/#query-language-system-distributed) +- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed) +- [Managing Distributed Tables](../../sql-reference/statements/system.md/#query-language-system-distributed) ## insert_shard_id {#insert_shard_id} @@ -2422,8 +2475,8 @@ SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster'; Possible values: -- 0 — Disabled. -- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table. +- 0 — Disabled. +- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table. Default value: `0`. @@ -2461,8 +2514,8 @@ Uses compact format for storing blocks for async (`insert_distributed_sync`) INS Possible values: -- 0 — Uses `user[:password]@host:port#default_database` directory format. -- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format. +- 0 — Uses `user[:password]@host:port#default_database` directory format. +- 1 — Uses `[shard{shard_index}[_replica{replica_index}]]` directory format. Default value: `1`. @@ -2495,14 +2548,14 @@ When merging is prohibited, the replica never merges parts and always downloads Possible values: -- 0 — `Replicated*MergeTree`-engine tables merge data parts at the replica. -- 1 — `Replicated*MergeTree`-engine tables do not merge data parts at the replica. The tables download merged data parts from other replicas. +- 0 — `Replicated*MergeTree`-engine tables merge data parts at the replica. +- 1 — `Replicated*MergeTree`-engine tables do not merge data parts at the replica. The tables download merged data parts from other replicas. Default value: 0. **See Also** -- [Data Replication](../../engines/table-engines/mergetree-family/replication.md) +- [Data Replication](../../engines/table-engines/mergetree-family/replication.md) ## background_distributed_schedule_pool_size {#background_distributed_schedule_pool_size} @@ -2531,8 +2584,8 @@ By default, `NULL` values can’t be compared because `NULL` means undefined val Possible values: -- 0 — Comparison of `NULL` values in `IN` operator returns `false`. -- 1 — Comparison of `NULL` values in `IN` operator returns `true`. +- 0 — Comparison of `NULL` values in `IN` operator returns `false`. +- 1 — Comparison of `NULL` values in `IN` operator returns `true`. Default value: 0. @@ -2579,7 +2632,7 @@ Result: **See Also** -- [NULL Processing in IN Operators](../../sql-reference/operators/in.md/#in-null-processing) +- [NULL Processing in IN Operators](../../sql-reference/operators/in.md/#in-null-processing) ## low_cardinality_max_dictionary_size {#low_cardinality_max_dictionary_size} @@ -2587,7 +2640,7 @@ Sets a maximum size in rows of a shared global dictionary for the [LowCardinalit Possible values: -- Any positive integer. +- Any positive integer. Default value: 8192. @@ -2599,8 +2652,8 @@ By default, the ClickHouse server monitors the size of dictionaries and if a dic Possible values: -- 1 — Creating several dictionaries for the data part is prohibited. -- 0 — Creating several dictionaries for the data part is not prohibited. +- 1 — Creating several dictionaries for the data part is prohibited. +- 0 — Creating several dictionaries for the data part is not prohibited. Default value: 0. @@ -2614,8 +2667,8 @@ This setting is required mainly for third-party clients which do not support `Lo Possible values: -- 1 — Usage of `LowCardinality` is not restricted. -- 0 — Usage of `LowCardinality` is restricted. +- 1 — Usage of `LowCardinality` is not restricted. +- 0 — Usage of `LowCardinality` is restricted. Default value: 1. @@ -2625,16 +2678,16 @@ Allows or restricts using [LowCardinality](../../sql-reference/data-types/lowcar For small fixed values using of `LowCardinality` is usually inefficient, because ClickHouse stores a numeric index for each row. As a result: -- Disk space usage can rise. -- RAM consumption can be higher, depending on a dictionary size. -- Some functions can work slower due to extra coding/encoding operations. +- Disk space usage can rise. +- RAM consumption can be higher, depending on a dictionary size. +- Some functions can work slower due to extra coding/encoding operations. Merge times in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)-engine tables can grow due to all the reasons described above. Possible values: -- 1 — Usage of `LowCardinality` is not restricted. -- 0 — Usage of `LowCardinality` is restricted. +- 1 — Usage of `LowCardinality` is not restricted. +- 0 — Usage of `LowCardinality` is restricted. Default value: 0. @@ -2644,14 +2697,14 @@ Sets the minimum number of rows in the block which can be inserted into a table Possible values: -- Any positive integer. -- 0 — Squashing disabled. +- Any positive integer. +- 0 — Squashing disabled. Default value: 1048576. **See Also** -- [min_insert_block_size_rows](#min-insert-block-size-rows) +- [min_insert_block_size_rows](#min-insert-block-size-rows) ## min_insert_block_size_bytes_for_materialized_views {#min-insert-block-size-bytes-for-materialized-views} @@ -2659,14 +2712,14 @@ Sets the minimum number of bytes in the block which can be inserted into a table Possible values: -- Any positive integer. -- 0 — Squashing disabled. +- Any positive integer. +- 0 — Squashing disabled. Default value: 268435456. **See also** -- [min_insert_block_size_bytes](#min-insert-block-size-bytes) +- [min_insert_block_size_bytes](#min-insert-block-size-bytes) ## optimize_read_in_order {#optimize_read_in_order} @@ -2674,14 +2727,14 @@ Enables [ORDER BY](../../sql-reference/statements/select/order-by.md/#optimize_r Possible values: -- 0 — `ORDER BY` optimization is disabled. -- 1 — `ORDER BY` optimization is enabled. +- 0 — `ORDER BY` optimization is disabled. +- 1 — `ORDER BY` optimization is enabled. Default value: `1`. **See Also** -- [ORDER BY Clause](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order) +- [ORDER BY Clause](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order) ## optimize_aggregation_in_order {#optimize_aggregation_in_order} @@ -2689,14 +2742,14 @@ Enables [GROUP BY](../../sql-reference/statements/select/group-by.md) optimizati Possible values: -- 0 — `GROUP BY` optimization is disabled. -- 1 — `GROUP BY` optimization is enabled. +- 0 — `GROUP BY` optimization is disabled. +- 1 — `GROUP BY` optimization is enabled. Default value: `0`. **See Also** -- [GROUP BY optimization](../../sql-reference/statements/select/group-by.md/#aggregation-in-order) +- [GROUP BY optimization](../../sql-reference/statements/select/group-by.md/#aggregation-in-order) ## mutations_sync {#mutations_sync} @@ -2704,16 +2757,16 @@ Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql Possible values: -- 0 - Mutations execute asynchronously. -- 1 - The query waits for all mutations to complete on the current server. -- 2 - The query waits for all mutations to complete on all replicas (if they exist). +- 0 - Mutations execute asynchronously. +- 1 - The query waits for all mutations to complete on the current server. +- 2 - The query waits for all mutations to complete on all replicas (if they exist). Default value: `0`. **See Also** -- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) -- [Mutations](../../sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) +- [Mutations](../../sql-reference/statements/alter/index.md#mutations) ## ttl_only_drop_parts {#ttl_only_drop_parts} @@ -2727,15 +2780,15 @@ Dropping whole parts instead of partial cleaning TTL-d rows allows having shorte Possible values: -- 0 — The complete dropping of data parts is disabled. -- 1 — The complete dropping of data parts is enabled. +- 0 — The complete dropping of data parts is disabled. +- 1 — The complete dropping of data parts is enabled. Default value: `0`. **See Also** -- [CREATE TABLE query clauses and settings](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-query-clauses) (`merge_with_ttl_timeout` setting) -- [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-table-ttl) +- [CREATE TABLE query clauses and settings](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-query-clauses) (`merge_with_ttl_timeout` setting) +- [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-table-ttl) ## lock_acquire_timeout {#lock_acquire_timeout} @@ -2745,8 +2798,8 @@ Locking timeout is used to protect from deadlocks while executing read/write ope Possible values: -- Positive integer (in seconds). -- 0 — No locking timeout. +- Positive integer (in seconds). +- 0 — No locking timeout. Default value: `120` seconds. @@ -2758,8 +2811,8 @@ When the setting is enabled and the argument of `CAST` function is `Nullable`, t Possible values: -- 0 — The `CAST` result has exactly the destination type specified. -- 1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`. +- 0 — The `CAST` result has exactly the destination type specified. +- 1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`. Default value: `0`. @@ -2797,7 +2850,7 @@ Result: **See Also** -- [CAST](../../sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) function +- [CAST](../../sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) function ## system_events_show_zero_values {#system_events_show_zero_values} @@ -2807,8 +2860,8 @@ Some monitoring systems require passing all the metrics values to them for each Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: `0`. @@ -2866,8 +2919,8 @@ It is implemented via query rewrite (similar to [count_distinct_implementation]( Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 0. @@ -2898,9 +2951,9 @@ Sets a mode for combining `SELECT` query results. The setting is only used when Possible values: -- `'DISTINCT'` — ClickHouse outputs rows as a result of combining queries removing duplicate rows. -- `'ALL'` — ClickHouse outputs all rows as a result of combining queries including duplicate rows. -- `''` — ClickHouse generates an exception when used with `UNION`. +- `'DISTINCT'` — ClickHouse outputs rows as a result of combining queries removing duplicate rows. +- `'ALL'` — ClickHouse outputs all rows as a result of combining queries including duplicate rows. +- `''` — ClickHouse generates an exception when used with `UNION`. Default value: `''`. @@ -2977,8 +3030,8 @@ Enables special logic to perform merges on replicas. Possible values: -- Positive integer (in seconds). -- 0 — Special merges logic is not used. Merges happen in the usual way on all the replicas. +- Positive integer (in seconds). +- 0 — Special merges logic is not used. Merges happen in the usual way on all the replicas. Default value: `0`. @@ -2996,8 +3049,8 @@ Sets the maximum number of parallel threads for the `SELECT` query data read pha Possible values: -- Positive integer. -- 0 or 1 — Disabled. `SELECT` queries are executed in a single thread. +- Positive integer. +- 0 or 1 — Disabled. `SELECT` queries are executed in a single thread. Default value: `16`. @@ -3007,9 +3060,9 @@ Sets the probability that the ClickHouse can start a trace for executed queries Possible values: -- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied). -- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. -- 1 — The trace for all executed queries is enabled. +- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied). +- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries. +- 1 — The trace for all executed queries is enabled. Default value: `0`. @@ -3019,8 +3072,8 @@ Enables or disables data transformation before the insertion, as if merge was do Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: 1. @@ -3149,8 +3202,8 @@ Allows working with experimental [geo data types](../../sql-reference/data-types Possible values: -- 0 — Working with geo data types is disabled. -- 1 — Working with geo data types is enabled. +- 0 — Working with geo data types is disabled. +- 1 — Working with geo data types is enabled. Default value: `0`. @@ -3160,8 +3213,8 @@ Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. Possible values: -- 0 — Queries will be executed with delay. -- 1 — Queries will be executed without delay. +- 0 — Queries will be executed with delay. +- 1 — Queries will be executed without delay. Default value: `0`. @@ -3171,8 +3224,8 @@ Sets the `SHOW TABLE` query display. Possible values: -- 0 — The query will be displayed without table UUID. -- 1 — The query will be displayed with table UUID. +- 0 — The query will be displayed without table UUID. +- 1 — The query will be displayed with table UUID. Default value: `0`. @@ -3182,8 +3235,8 @@ Allows creation of experimental [live views](../../sql-reference/statements/crea Possible values: -- 0 — Working with live views is disabled. -- 1 — Working with live views is enabled. +- 0 — Working with live views is disabled. +- 1 — Working with live views is enabled. Default value: `0`. @@ -3211,8 +3264,8 @@ HTTP connection timeout (in seconds). Possible values: -- Any positive integer. -- 0 - Disabled (infinite timeout). +- Any positive integer. +- 0 - Disabled (infinite timeout). Default value: 1. @@ -3222,8 +3275,8 @@ HTTP send timeout (in seconds). Possible values: -- Any positive integer. -- 0 - Disabled (infinite timeout). +- Any positive integer. +- 0 - Disabled (infinite timeout). Default value: 180. @@ -3233,8 +3286,8 @@ HTTP receive timeout (in seconds). Possible values: -- Any positive integer. -- 0 - Disabled (infinite timeout). +- Any positive integer. +- 0 - Disabled (infinite timeout). Default value: 180. @@ -3244,8 +3297,8 @@ Defines the level of detail for the [CHECK TABLE](../../sql-reference/statements Possible values: -- 0 — the query shows a check status for every individual data part of a table. -- 1 — the query shows the general table check status. +- 0 — the query shows a check status for every individual data part of a table. +- 1 — the query shows the general table check status. Default value: `0`. @@ -3299,8 +3352,8 @@ Sets the maximum number of rows to get from the query result. It adjusts the val Possible values: -- 0 — The number of rows is not limited. -- Positive integer. +- 0 — The number of rows is not limited. +- Positive integer. Default value: `0`. @@ -3310,8 +3363,8 @@ Sets the number of rows to skip before starting to return rows from the query. I Possible values: -- 0 — No rows are skipped . -- Positive integer. +- 0 — No rows are skipped . +- Positive integer. Default value: `0`. @@ -3347,8 +3400,8 @@ Enables to fuse aggregate functions with identical argument. It rewrites query c Possible values: -- 0 — Functions with identical argument are not fused. -- 1 — Functions with identical argument are fused. +- 0 — Functions with identical argument are not fused. +- 1 — Functions with identical argument are fused. Default value: `0`. @@ -3388,8 +3441,8 @@ Enables to create databases with [Replicated](../../engines/database-engines/rep Possible values: -- 0 — Disabled. -- 1 — Enabled. +- 0 — Disabled. +- 1 — Enabled. Default value: `0`. @@ -3399,8 +3452,8 @@ Sets how long initial DDL query should wait for Replicated database to precess p Possible values: -- Positive integer. -- 0 — Unlimited. +- Positive integer. +- 0 — Unlimited. Default value: `300`. @@ -3410,9 +3463,9 @@ Sets timeout for DDL query responses from all hosts in cluster. If a DDL request Possible values: -- Positive integer. -- 0 — Async mode. -- Negative integer — infinite timeout. +- Positive integer. +- 0 — Async mode. +- Negative integer — infinite timeout. Default value: `180`. @@ -3422,10 +3475,10 @@ Sets format of distributed DDL query result. Possible values: -- `throw` — Returns result set with query execution status for all hosts where query is finished. If query has failed on some hosts, then it will rethrow the first exception. If query is not finished yet on some hosts and [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) exceeded, then it throws `TIMEOUT_EXCEEDED` exception. -- `none` — Is similar to throw, but distributed DDL query returns no result set. -- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts. -- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts. +- `throw` — Returns result set with query execution status for all hosts where query is finished. If query has failed on some hosts, then it will rethrow the first exception. If query is not finished yet on some hosts and [distributed_ddl_task_timeout](#distributed_ddl_task_timeout) exceeded, then it throws `TIMEOUT_EXCEEDED` exception. +- `none` — Is similar to throw, but distributed DDL query returns no result set. +- `null_status_on_timeout` — Returns `NULL` as execution status in some rows of result set instead of throwing `TIMEOUT_EXCEEDED` if query is not finished on the corresponding hosts. +- `never_throw` — Do not throw `TIMEOUT_EXCEEDED` and do not rethrow exceptions if query has failed on some hosts. Default value: `throw`. @@ -3435,8 +3488,8 @@ Sets the data format of a [nested](../../sql-reference/data-types/nested-data-st Possible values: -- 1 — Nested column is flattened to separate arrays. -- 0 — Nested column stays a single array of tuples. +- 1 — Nested column is flattened to separate arrays. +- 0 — Nested column stays a single array of tuples. Default value: `1`. @@ -3500,8 +3553,8 @@ Defines how [mysql](../../sql-reference/table-functions/mysql.md), [postgresql]( Possible values: -- 0 — The table function explicitly uses Nullable columns. -- 1 — The table function implicitly uses Nullable columns. +- 0 — The table function explicitly uses Nullable columns. +- 1 — The table function implicitly uses Nullable columns. Default value: `1`. @@ -3515,8 +3568,8 @@ Enables or disables [projection](../../engines/table-engines/mergetree-family/me Possible values: -- 0 — Projection optimization disabled. -- 1 — Projection optimization enabled. +- 0 — Projection optimization disabled. +- 1 — Projection optimization enabled. Default value: `1`. @@ -3526,8 +3579,8 @@ Enables or disables the obligatory use of [projections](../../engines/table-engi Possible values: -- 0 — Projection optimization is not obligatory. -- 1 — Projection optimization is obligatory. +- 0 — Projection optimization is not obligatory. +- 1 — Projection optimization is obligatory. Default value: `0`. @@ -3537,9 +3590,9 @@ Allows to set up waiting for actions to be executed on replicas by [ALTER](../.. Possible values: -- 0 — Do not wait. -- 1 — Wait for own execution. -- 2 — Wait for everyone. +- 0 — Do not wait. +- 1 — Wait for own execution. +- 2 — Wait for everyone. Default value: `1`. @@ -3549,9 +3602,9 @@ Specifies how long (in seconds) to wait for inactive replicas to execute [ALTER] Possible values: -- 0 — Do not wait. -- Negative integer — Wait for unlimited time. -- Positive integer — The number of seconds to wait. +- 0 — Do not wait. +- Negative integer — Wait for unlimited time. +- Positive integer — The number of seconds to wait. Default value: `120` seconds. @@ -3561,7 +3614,7 @@ Sets the maximum number of matches for a single regular expression per row. Use Possible values: -- Positive integer. +- Positive integer. Default value: `1000`. @@ -3571,7 +3624,7 @@ Sets the maximum number of retries during a single HTTP read. Possible values: -- Positive integer. +- Positive integer. Default value: `1024`. @@ -3581,9 +3634,9 @@ Allows a user to write to [query_log](../../operations/system-tables/query_log.m Possible values: -- 0 — Queries are not logged in the system tables. -- Positive floating-point number in the range [0..1]. For example, if the setting value is `0.5`, about half of the queries are logged in the system tables. -- 1 — All queries are logged in the system tables. +- 0 — Queries are not logged in the system tables. +- Positive floating-point number in the range [0..1]. For example, if the setting value is `0.5`, about half of the queries are logged in the system tables. +- 1 — All queries are logged in the system tables. Default value: `1`. @@ -3593,9 +3646,9 @@ Allows calculating the [if](../../sql-reference/functions/conditional-functions. Possible values: -- `enable` — Enables short-circuit function evaluation for functions that are suitable for it (can throw an exception or computationally heavy). -- `force_enable` — Enables short-circuit function evaluation for all functions. -- `disable` — Disables short-circuit function evaluation. +- `enable` — Enables short-circuit function evaluation for functions that are suitable for it (can throw an exception or computationally heavy). +- `force_enable` — Enables short-circuit function evaluation for all functions. +- `disable` — Disables short-circuit function evaluation. Default value: `enable`. @@ -3605,8 +3658,8 @@ Defines the maximum length for each regular expression in the [hyperscan multi-m Possible values: -- Positive integer. -- 0 - The length is not limited. +- Positive integer. +- 0 - The length is not limited. Default value: `0`. @@ -3640,7 +3693,7 @@ Exception: Regexp length too large. **See Also** -- [max_hyperscan_regexp_total_length](#max-hyperscan-regexp-total-length) +- [max_hyperscan_regexp_total_length](#max-hyperscan-regexp-total-length) ## max_hyperscan_regexp_total_length {#max-hyperscan-regexp-total-length} @@ -3648,8 +3701,8 @@ Sets the maximum length total of all regular expressions in each [hyperscan mult Possible values: -- Positive integer. -- 0 - The length is not limited. +- Positive integer. +- 0 - The length is not limited. Default value: `0`. @@ -3683,7 +3736,7 @@ Exception: Total regexp lengths too large. **See Also** -- [max_hyperscan_regexp_length](#max-hyperscan-regexp-length) +- [max_hyperscan_regexp_length](#max-hyperscan-regexp-length) ## enable_positional_arguments {#enable-positional-arguments} @@ -3691,8 +3744,8 @@ Enables or disables supporting positional arguments for [GROUP BY](../../sql-ref Possible values: -- 0 — Positional arguments aren't supported. -- 1 — Positional arguments are supported: column numbers can use instead of column names. +- 0 — Positional arguments aren't supported. +- 1 — Positional arguments are supported: column numbers can use instead of column names. Default value: `1`. @@ -3721,13 +3774,13 @@ Result: ## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions} Enables or disables returning results of type: -- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md/#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md/#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md/#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md/#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md/#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md/#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md/#tolastdayofmonth). -- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md/#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md/#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md/#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md/#timeslot). +- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md/#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md/#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md/#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md/#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md/#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md/#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md/#tolastdayofmonth). +- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md/#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md/#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md/#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md/#timeslot). Possible values: -- 0 — Functions return `Date` or `DateTime` for all types of arguments. -- 1 — Functions return `Date32` or `DateTime64` for `Date32` or `DateTime64` arguments and `Date` or `DateTime` otherwise. +- 0 — Functions return `Date` or `DateTime` for all types of arguments. +- 1 — Functions return `Date32` or `DateTime64` for `Date32` or `DateTime64` arguments and `Date` or `DateTime` otherwise. Default value: `0`. @@ -3739,8 +3792,8 @@ Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.m Possible values: -- 0 — Automatic `PREWHERE` optimization is disabled. -- 1 — Automatic `PREWHERE` optimization is enabled. +- 0 — Automatic `PREWHERE` optimization is disabled. +- 1 — Automatic `PREWHERE` optimization is enabled. Default value: `1`. @@ -3752,14 +3805,14 @@ Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.m Possible values: -- 0 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is disabled. -- 1 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is enabled. +- 0 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is disabled. +- 1 — Automatic `PREWHERE` optimization in `SELECT` queries with `FINAL` modifier is enabled. Default value: `0`. **See Also** -- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting +- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting ## optimize_using_constraints @@ -3791,8 +3844,8 @@ Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/de Possible values: -- 0 — Subcolumns are not included in `DESCRIBE` queries. -- 1 — Subcolumns are included in `DESCRIBE` queries. +- 0 — Subcolumns are not included in `DESCRIBE` queries. +- 1 — Subcolumns are included in `DESCRIBE` queries. Default value: `0`. @@ -3808,8 +3861,8 @@ Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/parti Possible values: -- 0 — disable verbosity. -- 1 — enable verbosity. +- 0 — disable verbosity. +- 1 — enable verbosity. Default value: `0`. @@ -3842,8 +3895,8 @@ This is an experimental setting. Sets the minimum amount of memory for reading l Possible values: -- Positive integer. -- 0 — Big files read with only copying data from kernel to userspace. +- Positive integer. +- 0 — Big files read with only copying data from kernel to userspace. Default value: `0`. @@ -3853,8 +3906,8 @@ Enables or disables waiting unfinished queries when shutdown server. Possible values: -- 0 — Disabled. -- 1 — Enabled. The wait time equal shutdown_wait_unfinished config. +- 0 — Disabled. +- 1 — Enabled. The wait time equal shutdown_wait_unfinished config. Default value: 0. @@ -3966,8 +4019,8 @@ INSERT INTO FUNCTION null('foo String') VALUES ('bar') SETTINGS max_threads=1; Possible values: -- 0 — Disallow. -- 1 — Allow. +- 0 — Disallow. +- 1 — Allow. Default value: `0`. @@ -4043,6 +4096,47 @@ Possible values: Default value: `0`. +## async_socket_for_remote {#async_socket_for_remote} + +Enables asynchronous read from socket while executing remote query. + +Enabled by default. + +## async_query_sending_for_remote {#async_query_sending_for_remote} + +Enables asynchronous connection creation and query sending while executing remote query. + +Enabled by default. + +## use_hedged_requests {#use_hedged_requests} + +Enables hadged requests logic for remote queries. It allows to establish many connections with different replicas for query. +New connection is enabled in case existent connection(s) with replica(s) were not established within `hedged_connection_timeout` +or no data was received within `receive_data_timeout`. Query uses the first connection which send non empty progress packet (or data packet, if `allow_changing_replica_until_first_data_packet`); +other connections are cancelled. Queries with `max_parallel_replicas > 1` are supported. + +Enabled by default. + +## hedged_connection_timeout {#hedged_connection_timeout} + +If we can't establish connection with replica after this timeout in hedged requests, we start working with the next replica without cancelling connection to the previous. +Timeout value is in milliseconds. + +Default value: `50`. + +## receive_data_timeout {#receive_data_timeout} + +This timeout is set when the query is sent to the replica in hedged requests, if we don't receive first packet of data and we don't make any progress in query execution after this timeout, +we start working with the next replica, without cancelling connection to the previous. +Timeout value is in milliseconds. + +Default value: `2000` + +## allow_changing_replica_until_first_data_packet {#allow_changing_replica_until_first_data_packet} + +If it's enabled, in hedged requests we can start new connection until receiving first data packet even if we have already made some progress +(but progress haven't updated for `receive_data_timeout` timeout), otherwise we disable changing replica after the first time we made progress. + ## partial_result_on_first_cancel {#partial_result_on_first_cancel} When set to `true` and the user wants to interrupt a query (for example using `Ctrl+C` on the client), then the query continues execution only on data that was already read from the table. Afterwards, it will return a partial result of the query for the part of the table that was read. To fully stop the execution of a query without a partial result, the user should send 2 cancel requests. @@ -4087,8 +4181,8 @@ SELECT JSON_VALUE('{"hello":"world"}', '$.b') settings function_json_value_retur Possible values: -- true — Allow. -- false — Disallow. +- true — Allow. +- false — Disallow. Default value: `false`. @@ -4108,7 +4202,7 @@ SELECT JSON_VALUE('{"hello":{"world":"!"}}', '$.hello') settings function_json_v Possible values: -- true — Allow. -- false — Disallow. +- true — Allow. +- false — Disallow. Default value: `false`. diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index af7c526e29b..ac6ea22ab75 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -45,11 +45,11 @@ Configuration markup: Required parameters: -- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data. +- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data. Optional parameters: -- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`. +- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`. ## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system} @@ -78,16 +78,16 @@ When writing the same file to `disk2`, it will actually be written to the physic Required parameters: -- `type` — `encrypted`. Otherwise the encrypted disk is not created. -- `disk` — Type of disk for data storage. -- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encode the key in hexadecimal form. +- `type` — `encrypted`. Otherwise the encrypted disk is not created. +- `disk` — Type of disk for data storage. +- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encode the key in hexadecimal form. You can specify multiple keys using the `id` attribute (see example above). Optional parameters: -- `path` — Path to the location on the disk where the data will be saved. If not specified, the data will be saved in the root directory. -- `current_key_id` — The key used for encryption. All the specified keys can be used for decryption, and you can always switch to another key while maintaining access to previously encrypted data. -- `algorithm` — [Algorithm](/docs/en/sql-reference/statements/create/table.md/#create-query-encryption-codecs) for encryption. Possible values: `AES_128_CTR`, `AES_192_CTR` or `AES_256_CTR`. Default value: `AES_128_CTR`. The key length depends on the algorithm: `AES_128_CTR` — 16 bytes, `AES_192_CTR` — 24 bytes, `AES_256_CTR` — 32 bytes. +- `path` — Path to the location on the disk where the data will be saved. If not specified, the data will be saved in the root directory. +- `current_key_id` — The key used for encryption. All the specified keys can be used for decryption, and you can always switch to another key while maintaining access to previously encrypted data. +- `algorithm` — [Algorithm](/docs/en/sql-reference/statements/create/table.md/#create-query-encryption-codecs) for encryption. Possible values: `AES_128_CTR`, `AES_192_CTR` or `AES_256_CTR`. Default value: `AES_128_CTR`. The key length depends on the algorithm: `AES_128_CTR` — 16 bytes, `AES_192_CTR` — 24 bytes, `AES_256_CTR` — 32 bytes. Example of disk configuration: @@ -446,14 +446,14 @@ SETTINGS storage_policy='web'; Required parameters: -- `type` — `web`. Otherwise the disk is not created. -- `endpoint` — The endpoint URL in `path` format. Endpoint URL must contain a root path to store data, where they were uploaded. +- `type` — `web`. Otherwise the disk is not created. +- `endpoint` — The endpoint URL in `path` format. Endpoint URL must contain a root path to store data, where they were uploaded. Optional parameters: -- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1` Mb. -- `remote_fs_read_backoff_threashold` — The maximum wait time when trying to read data for remote disk. Default value: `10000` seconds. -- `remote_fs_read_backoff_max_tries` — The maximum number of attempts to read with backoff. Default value: `5`. +- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1` Mb. +- `remote_fs_read_backoff_threashold` — The maximum wait time when trying to read data for remote disk. Default value: `10000` seconds. +- `remote_fs_read_backoff_max_tries` — The maximum number of attempts to read with backoff. Default value: `5`. If a query fails with an exception `DB:Exception Unreachable URL`, then you can try to adjust the settings: [http_connection_timeout](/docs/en/operations/settings/settings.md/#http_connection_timeout), [http_receive_timeout](/docs/en/operations/settings/settings.md/#http_receive_timeout), [keep_alive_timeout](/docs/en/operations/server-configuration-parameters/settings.md/#keep-alive-timeout). diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 8a2f25629f6..4290799b6bc 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -7,11 +7,11 @@ Contains the historical values for `system.asynchronous_metrics`, which are save Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. -- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. +- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. **Example** diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index 551aa771ec9..f357341da67 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -7,9 +7,9 @@ Contains metrics that are calculated periodically in the background. For example Columns: -- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. -- `description` ([String](../../sql-reference/data-types/string.md) - Metric description) +- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `description` ([String](../../sql-reference/data-types/string.md) - Metric description) **Example** @@ -32,9 +32,592 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 └─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` +## Metric descriptions + + +### AsynchronousHeavyMetricsCalculationTimeSpent + +Time in seconds spent for calculation of asynchronous heavy (tables related) metrics (this is the overhead of asynchronous metrics). + +### AsynchronousHeavyMetricsUpdateInterval + +Heavy (tables related) metrics update interval + +### AsynchronousMetricsCalculationTimeSpent + +Time in seconds spent for calculation of asynchronous metrics (this is the overhead of asynchronous metrics). + +### AsynchronousMetricsUpdateInterval + +Metrics update interval + +### BlockActiveTime_*name* + +Time in seconds the block device had the IO requests queued. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockDiscardBytes_*name* + +Number of discarded bytes on the block device. These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockDiscardMerges_*name* + +Number of discard operations requested from the block device and merged together by the OS IO scheduler. These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockDiscardOps_*name* + +Number of discard operations requested from the block device. These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockDiscardTime_*name* + +Time in seconds spend in discard operations requested from the block device, summed across all the operations. These operations are relevant for SSD. Discard operations are not used by ClickHouse, but can be used by other processes on the system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockInFlightOps_*name* + +This value counts the number of I/O requests that have been issued to the device driver but have not yet completed. It does not include IO requests that are in the queue but not yet issued to the device driver. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockQueueTime_*name* + +This value counts the number of milliseconds that IO requests have waited on this block device. If there are multiple IO requests waiting, this value will increase as the product of the number of milliseconds times the number of requests waiting. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockReadBytes_*name* + +Number of bytes read from the block device. It can be lower than the number of bytes read from the filesystem due to the usage of the OS page cache, that saves IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockReadMerges_*name* + +Number of read operations requested from the block device and merged together by the OS IO scheduler. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockReadOps_*name* + +Number of read operations requested from the block device. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockReadTime_*name* + +Time in seconds spend in read operations requested from the block device, summed across all the operations. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockWriteBytes_*name* + +Number of bytes written to the block device. It can be lower than the number of bytes written to the filesystem due to the usage of the OS page cache, that saves IO. A write to the block device may happen later than the corresponding write to the filesystem due to write-through caching. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockWriteMerges_*name* + +Number of write operations requested from the block device and merged together by the OS IO scheduler. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockWriteOps_*name* + +Number of write operations requested from the block device. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### BlockWriteTime_*name* + +Time in seconds spend in write operations requested from the block device, summed across all the operations. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Source: `/sys/block`. See https://www.kernel.org/doc/Documentation/block/stat.txt + +### CPUFrequencyMHz_*name* + +The current frequency of the CPU, in MHz. Most of the modern CPUs adjust the frequency dynamically for power saving and Turbo Boosting. + +### CompiledExpressionCacheBytes + +Total bytes used for the cache of JIT-compiled code. + +### CompiledExpressionCacheCount + +Total entries in the cache of JIT-compiled code. + +### DiskAvailable_*name* + +Available bytes on the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB. + +### DiskTotal_*name* + +The total size in bytes of the disk (virtual filesystem). Remote filesystems can show a large value like 16 EiB. + +### DiskUnreserved_*name* + +Available bytes on the disk (virtual filesystem) without the reservations for merges, fetches, and moves. Remote filesystems can show a large value like 16 EiB. + +### DiskUsed_*name* + +Used bytes on the disk (virtual filesystem). Remote filesystems not always provide this information. + +### FilesystemCacheBytes + +Total bytes in the `cache` virtual filesystem. This cache is hold on disk. + +### FilesystemCacheFiles + +Total number of cached file segments in the `cache` virtual filesystem. This cache is hold on disk. + +### FilesystemLogsPathAvailableBytes + +Available bytes on the volume where ClickHouse logs path is mounted. If this value approaches zero, you should tune the log rotation in the configuration file. + +### FilesystemLogsPathAvailableINodes + +The number of available inodes on the volume where ClickHouse logs path is mounted. + +### FilesystemLogsPathTotalBytes + +The size of the volume where ClickHouse logs path is mounted, in bytes. It's recommended to have at least 10 GB for logs. + +### FilesystemLogsPathTotalINodes + +The total number of inodes on the volume where ClickHouse logs path is mounted. + +### FilesystemLogsPathUsedBytes + +Used bytes on the volume where ClickHouse logs path is mounted. + +### FilesystemLogsPathUsedINodes + +The number of used inodes on the volume where ClickHouse logs path is mounted. + +### FilesystemMainPathAvailableBytes + +Available bytes on the volume where the main ClickHouse path is mounted. + +### FilesystemMainPathAvailableINodes + +The number of available inodes on the volume where the main ClickHouse path is mounted. If it is close to zero, it indicates a misconfiguration, and you will get 'no space left on device' even when the disk is not full. + +### FilesystemMainPathTotalBytes + +The size of the volume where the main ClickHouse path is mounted, in bytes. + +### FilesystemMainPathTotalINodes + +The total number of inodes on the volume where the main ClickHouse path is mounted. If it is less than 25 million, it indicates a misconfiguration. + +### FilesystemMainPathUsedBytes + +Used bytes on the volume where the main ClickHouse path is mounted. + +### FilesystemMainPathUsedINodes + +The number of used inodes on the volume where the main ClickHouse path is mounted. This value mostly corresponds to the number of files. + +### HTTPThreads + +Number of threads in the server of the HTTP interface (without TLS). + +### InterserverThreads + +Number of threads in the server of the replicas communication protocol (without TLS). + +### Jitter + +The difference in time the thread for calculation of the asynchronous metrics was scheduled to wake up and the time it was in fact, woken up. A proxy-indicator of overall system latency and responsiveness. + +### LoadAverage_*N* + +The whole system load, averaged with exponential smoothing over 1 minute. The load represents the number of threads across all the processes (the scheduling entities of the OS kernel), that are currently running by CPU or waiting for IO, or ready to run but not being scheduled at this point of time. This number includes all the processes, not only clickhouse-server. The number can be greater than the number of CPU cores, if the system is overloaded, and many processes are ready to run but waiting for CPU or IO. + +### MMapCacheCells + +The number of files opened with `mmap` (mapped in memory). This is used for queries with the setting `local_filesystem_read_method` set to `mmap`. The files opened with `mmap` are kept in the cache to avoid costly TLB flushes. + +### MarkCacheBytes + +Total size of mark cache in bytes + +### MarkCacheFiles + +Total number of mark files cached in the mark cache + +### MaxPartCountForPartition + +Maximum number of parts per partition across all partitions of all tables of MergeTree family. Values larger than 300 indicates misconfiguration, overload, or massive data loading. + +### MemoryCode + +The amount of virtual memory mapped for the pages of machine code of the server process, in bytes. + +### MemoryDataAndStack + +The amount of virtual memory mapped for the use of stack and for the allocated memory, in bytes. It is unspecified whether it includes the per-thread stacks and most of the allocated memory, that is allocated with the 'mmap' system call. This metric exists only for completeness reasons. I recommend to use the `MemoryResident` metric for monitoring. + +### MemoryResident + +The amount of physical memory used by the server process, in bytes. + +### MemoryShared + +The amount of memory used by the server process, that is also shared by another processes, in bytes. ClickHouse does not use shared memory, but some memory can be labeled by OS as shared for its own reasons. This metric does not make a lot of sense to watch, and it exists only for completeness reasons. + +### MemoryVirtual + +The size of the virtual address space allocated by the server process, in bytes. The size of the virtual address space is usually much greater than the physical memory consumption, and should not be used as an estimate for the memory consumption. The large values of this metric are totally normal, and makes only technical sense. + +### MySQLThreads + +Number of threads in the server of the MySQL compatibility protocol. + +### NetworkReceiveBytes_*name* + + Number of bytes received via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkReceiveDrop_*name* + + Number of bytes a packet was dropped while received via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkReceiveErrors_*name* + + Number of times error happened receiving via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkReceivePackets_*name* + + Number of network packets received via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkSendBytes_*name* + + Number of bytes sent via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkSendDrop_*name* + + Number of times a packed was dropped while sending via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkSendErrors_*name* + + Number of times error (e.g. TCP retransmit) happened while sending via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NetworkSendPackets_*name* + + Number of network packets sent via the network interface. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### NumberOfDatabases + +Total number of databases on the server. + +### NumberOfDetachedByUserParts + +The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed. + +### NumberOfDetachedParts + +The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed. + +### NumberOfTables + +Total number of tables summed across the databases on the server, excluding the databases that cannot contain MergeTree tables. The excluded database engines are those who generate the set of tables on the fly, like `Lazy`, `MySQL`, `PostgreSQL`, `SQlite`. + +### OSContextSwitches + +The number of context switches that the system underwent on the host machine. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSGuestNiceTime + +The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This metric is irrelevant for ClickHouse, but still exists for completeness. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSGuestNiceTimeCPU_*N* + +The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel, when a guest was set to a higher priority (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This metric is irrelevant for ClickHouse, but still exists for completeness. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSGuestNiceTimeNormalized + +The value is similar to `OSGuestNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSGuestTime + +The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This metric is irrelevant for ClickHouse, but still exists for completeness. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSGuestTimeCPU_*N* + +The ratio of time spent running a virtual CPU for guest operating systems under the control of the Linux kernel (See `man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This metric is irrelevant for ClickHouse, but still exists for completeness. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSGuestTimeNormalized + +The value is similar to `OSGuestTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSIOWaitTime + +The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIOWaitTimeCPU_*N* + +The ratio of time the CPU core was not running the code but when the OS kernel did not run any other process on this CPU as the processes were waiting for IO. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIOWaitTimeNormalized + +The value is similar to `OSIOWaitTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSIdleTime + +The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core). The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIdleTimeCPU_*N* + +The ratio of time the CPU core was idle (not even ready to run a process waiting for IO) from the OS kernel standpoint. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This does not include the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core). The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIdleTimeNormalized + +The value is similar to `OSIdleTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSInterrupts + +The number of interrupts on the host machine. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSIrqTime + +The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. A high number of this metric may indicate hardware misconfiguration or a very high network load. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIrqTimeCPU_*N* + +The ratio of time spent for running hardware interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. A high number of this metric may indicate hardware misconfiguration or a very high network load. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSIrqTimeNormalized + +The value is similar to `OSIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSMemoryAvailable + +The amount of memory available to be used by programs, in bytes. This is very similar to the `OSMemoryFreePlusCached` metric. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryBuffers + +The amount of memory used by OS kernel buffers, in bytes. This should be typically small, and large values may indicate a misconfiguration of the OS. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryCached + +The amount of memory used by the OS page cache, in bytes. Typically, almost all available memory is used by the OS page cache - high values of this metric are normal and expected. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryFreePlusCached + +The amount of free memory plus OS page cache memory on the host system, in bytes. This memory is available to be used by programs. The value should be very similar to `OSMemoryAvailable`. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryFreeWithoutCached + +The amount of free memory on the host system, in bytes. This does not include the memory used by the OS page cache memory, in bytes. The page cache memory is also available for usage by programs, so the value of this metric can be confusing. See the `OSMemoryAvailable` metric instead. For convenience we also provide the `OSMemoryFreePlusCached` metric, that should be somewhat similar to OSMemoryAvailable. See also https://www.linuxatemyram.com/. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSMemoryTotal + +The total amount of memory on the host system, in bytes. + +### OSNiceTime + +The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSNiceTimeCPU_*N* + +The ratio of time the CPU core was running userspace code with higher priority. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSNiceTimeNormalized + +The value is similar to `OSNiceTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSOpenFiles + +The total number of opened files on the host machine. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSProcessesBlocked + +Number of threads blocked waiting for I/O to complete (`man procfs`). This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSProcessesCreated + +The number of processes created. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSProcessesRunning + +The number of runnable (running or ready to run) threads by the operating system. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. + +### OSSoftIrqTime + +The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. A high number of this metric may indicate inefficient software running on the system. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSSoftIrqTimeCPU_*N* + +The ratio of time spent for running software interrupt requests on the CPU. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. A high number of this metric may indicate inefficient software running on the system. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSSoftIrqTimeNormalized + +The value is similar to `OSSoftIrqTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSStealTime + +The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Not every virtualized environments present this metric, and most of them don't. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSStealTimeCPU_*N* + +The ratio of time spent in other operating systems by the CPU when running in a virtualized environment. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. Not every virtualized environments present this metric, and most of them don't. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSStealTimeNormalized + +The value is similar to `OSStealTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSSystemTime + +The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSSystemTimeCPU_*N* + +The ratio of time the CPU core was running OS kernel (system) code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSSystemTimeNormalized + +The value is similar to `OSSystemTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### OSThreadsRunnable + +The total number of 'runnable' threads, as the OS kernel scheduler seeing it. + +### OSThreadsTotal + +The total number of threads, as the OS kernel scheduler seeing it. + +### OSUptime + +The uptime of the host server (the machine where ClickHouse is running), in seconds. + +### OSUserTime + +The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core). The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSUserTimeCPU_*N* + +The ratio of time the CPU core was running userspace code. This is a system-wide metric, it includes all the processes on the host machine, not just clickhouse-server. This includes also the time when the CPU was under-utilized due to the reasons internal to the CPU (memory loads, pipeline stalls, branch mispredictions, running another SMT core). The value for a single CPU core will be in the interval [0..1]. The value for all CPU cores is calculated as a sum across them [0..num cores]. + +### OSUserTimeNormalized + +The value is similar to `OSUserTime` but divided to the number of CPU cores to be measured in the [0..1] interval regardless of the number of cores. This allows you to average the values of this metric across multiple servers in a cluster even if the number of cores is non-uniform, and still get the average resource utilization metric. + +### PostgreSQLThreads + +Number of threads in the server of the PostgreSQL compatibility protocol. + +### ReplicasMaxAbsoluteDelay + +Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data. + +### ReplicasMaxInsertsInQueue + +Maximum number of INSERT operations in the queue (still to be replicated) across Replicated tables. + +### ReplicasMaxMergesInQueue + +Maximum number of merge operations in the queue (still to be applied) across Replicated tables. + +### ReplicasMaxQueueSize + +Maximum queue size (in the number of operations like get, merge) across Replicated tables. + +### ReplicasMaxRelativeDelay + +Maximum difference between the replica delay and the delay of the most up-to-date replica of the same table, across Replicated tables. + +### ReplicasSumInsertsInQueue + +Sum of INSERT operations in the queue (still to be replicated) across Replicated tables. + +### ReplicasSumMergesInQueue + +Sum of merge operations in the queue (still to be applied) across Replicated tables. + +### ReplicasSumQueueSize + +Sum queue size (in the number of operations like get, merge) across Replicated tables. + +### TCPThreads + +Number of threads in the server of the TCP protocol (without TLS). + +### Temperature_*N* + +The temperature of the corresponding device in ℃. A sensor can return an unrealistic value. Source: `/sys/class/thermal` + +### Temperature_*name* + +The temperature reported by the corresponding hardware monitor and the corresponding sensor in ℃. A sensor can return an unrealistic value. Source: `/sys/class/hwmon` + +### TotalBytesOfMergeTreeTables + +Total amount of bytes (compressed, including data and indices) stored in all tables of MergeTree family. + +### TotalPartsOfMergeTreeTables + +Total amount of data parts in all tables of MergeTree family. Numbers larger than 10 000 will negatively affect the server startup time and it may indicate unreasonable choice of the partition key. + +### TotalRowsOfMergeTreeTables + +Total amount of rows (records) stored in all tables of MergeTree family. + +### UncompressedCacheBytes + +Total size of uncompressed cache in bytes. Uncompressed cache does not usually improve the performance and should be mostly avoided. + +### UncompressedCacheCells + +Total number of entries in the uncompressed cache. Each entry represents a decompressed block of data. Uncompressed cache does not usually improve performance and should be mostly avoided. + +### Uptime + +The server uptime in seconds. It includes the time spent for server initialization before accepting connections. + +### jemalloc.active + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.allocated + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.dirty_purged + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.muzzy_purged + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.pactive + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.pdirty + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.arenas.all.pmuzzy + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.background_thread.num_runs + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.background_thread.num_threads + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.background_thread.run_intervals + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.epoch + +An internal incremental update number of the statistics of jemalloc (Jason Evans' memory allocator), used in all other `jemalloc` metrics. + +### jemalloc.mapped + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.metadata + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.metadata_thp + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.resident + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + +### jemalloc.retained + +An internal metric of the low-level memory allocator (jemalloc). See https://jemalloc.net/jemalloc.3.html + **See Also** -- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. -- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index 2c5e2699b4f..4b1e75c25a1 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -7,19 +7,19 @@ Contains information about clusters available in the config file and the servers Columns: -- `cluster` ([String](../../sql-reference/data-types/string.md)) — The cluster name. -- `shard_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The shard number in the cluster, starting from 1. -- `shard_weight` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The relative weight of the shard when writing data. -- `replica_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The replica number in the shard, starting from 1. -- `host_name` ([String](../../sql-reference/data-types/string.md)) — The host name, as specified in the config. -- `host_address` ([String](../../sql-reference/data-types/string.md)) — The host IP address obtained from DNS. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port to use for connecting to the server. -- `is_local` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the host is local. -- `user` ([String](../../sql-reference/data-types/string.md)) — The name of the user for connecting to the server. -- `default_database` ([String](../../sql-reference/data-types/string.md)) — The default database name. -- `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times this host failed to reach replica. -- `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of slowdowns that led to changing replica when establishing a connection with hedged requests. -- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed and it is considered to be back to normal. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — The cluster name. +- `shard_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The shard number in the cluster, starting from 1. +- `shard_weight` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The relative weight of the shard when writing data. +- `replica_num` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The replica number in the shard, starting from 1. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — The host name, as specified in the config. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — The host IP address obtained from DNS. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port to use for connecting to the server. +- `is_local` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the host is local. +- `user` ([String](../../sql-reference/data-types/string.md)) — The name of the user for connecting to the server. +- `default_database` ([String](../../sql-reference/data-types/string.md)) — The default database name. +- `errors_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of times this host failed to reach replica. +- `slowdowns_count` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of slowdowns that led to changing replica when establishing a connection with hedged requests. +- `estimated_recovery_time` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Seconds remaining until the replica error count is zeroed and it is considered to be back to normal. **Example** @@ -67,6 +67,6 @@ estimated_recovery_time: 0 **See Also** -- [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) -- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) +- [Table engine Distributed](../../engines/table-engines/special/distributed.md) +- [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) +- [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index bdb35e24e37..ccdc2d8c742 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -11,27 +11,27 @@ Columns from [temporary tables](../../sql-reference/statements/create/table.md#t The `system.columns` table contains the following columns (the column type is shown in brackets): -- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. -- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `name` ([String](../../sql-reference/data-types/string.md)) — Column name. -- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. -- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. -- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. -- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. -- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. -- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. -- `comment` ([String](../../sql-reference/data-types/string.md)) — Comment on the column, or an empty string if it is not defined. -- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the partition expression. -- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sorting key expression. -- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the primary key expression. -- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression. -- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name. -- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. -- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. -- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. -- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. -- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. +- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Column name. +- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. +- `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. +- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. +- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. +- `comment` ([String](../../sql-reference/data-types/string.md)) — Comment on the column, or an empty string if it is not defined. +- `is_in_partition_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the partition expression. +- `is_in_sorting_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sorting key expression. +- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the primary key expression. +- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression. +- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name. +- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. +- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. **Example** diff --git a/docs/en/operations/system-tables/contributors.md b/docs/en/operations/system-tables/contributors.md index b871bb20f2e..f00329e2dbe 100644 --- a/docs/en/operations/system-tables/contributors.md +++ b/docs/en/operations/system-tables/contributors.md @@ -7,7 +7,7 @@ Contains information about contributors. The order is random at query execution Columns: -- `name` (String) — Contributor (author) name from git log. +- `name` (String) — Contributor (author) name from git log. **Example** diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md index a44b0db8e9b..4d015a513a2 100644 --- a/docs/en/operations/system-tables/crash-log.md +++ b/docs/en/operations/system-tables/crash-log.md @@ -7,17 +7,17 @@ Contains information about stack traces for fatal errors. The table does not exi Columns: -- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date of the event. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event. -- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds. -- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number. -- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query ID. -- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process. -- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process. -- `version` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server version. -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server revision. -- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID that is generated by compiler. +- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date of the event. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event. +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds. +- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query ID. +- `trace` ([Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Stack trace at the moment of crash. Each element is a virtual memory address inside ClickHouse server process. +- `trace_full` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Stack trace at the moment of crash. Each element contains a called method inside ClickHouse server process. +- `version` ([String](../../sql-reference/data-types/string.md)) — ClickHouse server version. +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server revision. +- `build_id` ([String](../../sql-reference/data-types/string.md)) — BuildID that is generated by compiler. **Example** @@ -46,6 +46,6 @@ build_id: ``` **See also** -- [trace_log](../../operations/system-tables/trace_log.md) system table +- [trace_log](../../operations/system-tables/trace_log.md) system table [Original article](https://clickhouse.com/docs/en/operations/system-tables/crash-log) diff --git a/docs/en/operations/system-tables/data_skipping_indices.md b/docs/en/operations/system-tables/data_skipping_indices.md index f1e233b33f7..188d94c50da 100644 --- a/docs/en/operations/system-tables/data_skipping_indices.md +++ b/docs/en/operations/system-tables/data_skipping_indices.md @@ -7,16 +7,16 @@ Contains information about existing data skipping indices in all the tables. Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. -- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `name` ([String](../../sql-reference/data-types/string.md)) — Index name. -- `type` ([String](../../sql-reference/data-types/string.md)) — Index type. -- `type_full` ([String](../../sql-reference/data-types/string.md)) — Index type expression from create statement. -- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression for the index calculation. -- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of granules in the block. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. -- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. -- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. +- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Index name. +- `type` ([String](../../sql-reference/data-types/string.md)) — Index type. +- `type_full` ([String](../../sql-reference/data-types/string.md)) — Index type expression from create statement. +- `expr` ([String](../../sql-reference/data-types/string.md)) — Expression for the index calculation. +- `granularity` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of granules in the block. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of decompressed data, in bytes. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of marks, in bytes. **Example** diff --git a/docs/en/operations/system-tables/data_type_families.md b/docs/en/operations/system-tables/data_type_families.md index f914d5545d3..1392e977f60 100644 --- a/docs/en/operations/system-tables/data_type_families.md +++ b/docs/en/operations/system-tables/data_type_families.md @@ -7,9 +7,9 @@ Contains information about supported [data types](../../sql-reference/data-types Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Data type name. -- `case_insensitive` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid. -- `alias_to` ([String](../../sql-reference/data-types/string.md)) — Data type name for which `name` is an alias. +- `name` ([String](../../sql-reference/data-types/string.md)) — Data type name. +- `case_insensitive` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Property that shows whether you can use a data type name in a query in case insensitive manner or not. For example, `Date` and `date` are both valid. +- `alias_to` ([String](../../sql-reference/data-types/string.md)) — Data type name for which `name` is an alias. **Example** @@ -34,4 +34,4 @@ SELECT * FROM system.data_type_families WHERE alias_to = 'String' **See Also** -- [Syntax](../../sql-reference/syntax.md) — Information about supported syntax. +- [Syntax](../../sql-reference/syntax.md) — Information about supported syntax. diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md index 8f0cc6e56d2..f3d3d388c36 100644 --- a/docs/en/operations/system-tables/databases.md +++ b/docs/en/operations/system-tables/databases.md @@ -7,13 +7,13 @@ Contains information about the databases that are available to the current user. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Database name. -- `engine` ([String](../../sql-reference/data-types/string.md)) — [Database engine](../../engines/database-engines/index.md). -- `data_path` ([String](../../sql-reference/data-types/string.md)) — Data path. -- `metadata_path` ([String](../../sql-reference/data-types/enum.md)) — Metadata path. -- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Database UUID. -- `comment` ([String](../../sql-reference/data-types/enum.md)) — Database comment. -- `engine_full` ([String](../../sql-reference/data-types/enum.md)) — Parameters of the database engine. +- `name` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `engine` ([String](../../sql-reference/data-types/string.md)) — [Database engine](../../engines/database-engines/index.md). +- `data_path` ([String](../../sql-reference/data-types/string.md)) — Data path. +- `metadata_path` ([String](../../sql-reference/data-types/enum.md)) — Metadata path. +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Database UUID. +- `comment` ([String](../../sql-reference/data-types/enum.md)) — Database comment. +- `engine_full` ([String](../../sql-reference/data-types/enum.md)) — Parameters of the database engine. The `name` column from this system table is used for implementing the `SHOW DATABASES` query. diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index ca6b7faaa78..8632581144c 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -7,36 +7,36 @@ Contains information about [dictionaries](../../sql-reference/dictionaries/index Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. -- `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/index.md). -- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Dictionary UUID. -- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values: - - `NOT_LOADED` — Dictionary was not loaded because it was not used. - - `LOADED` — Dictionary loaded successfully. - - `FAILED` — Unable to load the dictionary as a result of an error. - - `LOADING` — Dictionary is loading now. - - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). - - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. -- `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. -- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory). -- `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary. -- `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary. -- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary. -- `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary. -- `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. -- `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. -- `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. -- `found_rate` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of uses for which the value was found. -- `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. -- `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). -- `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/index.md#dictionary-sources) for the dictionary. -- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. -- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. -- `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary. -- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes. -- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading. -- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. -- `comment` ([String](../../sql-reference/data-types/string.md)) — Text of the comment to dictionary. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database containing the dictionary created by DDL query. Empty string for other dictionaries. +- `name` ([String](../../sql-reference/data-types/string.md)) — [Dictionary name](../../sql-reference/dictionaries/index.md). +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Dictionary UUID. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Dictionary status. Possible values: + - `NOT_LOADED` — Dictionary was not loaded because it was not used. + - `LOADED` — Dictionary loaded successfully. + - `FAILED` — Unable to load the dictionary as a result of an error. + - `LOADING` — Dictionary is loading now. + - `LOADED_AND_RELOADING` — Dictionary is loaded successfully, and is being reloaded right now (frequent reasons: [SYSTEM RELOAD DICTIONARY](../../sql-reference/statements/system.md#query_language-system-reload-dictionary) query, timeout, dictionary config has changed). + - `FAILED_AND_RELOADING` — Could not load the dictionary as a result of an error and is loading now. +- `origin` ([String](../../sql-reference/data-types/string.md)) — Path to the configuration file that describes the dictionary. +- `type` ([String](../../sql-reference/data-types/string.md)) — Type of a dictionary allocation. [Storing Dictionaries in Memory](../../sql-reference/dictionaries/index.md#storig-dictionaries-in-memory). +- `key.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [key names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary. +- `key.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [key types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-key) provided by the dictionary. +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of [attribute names](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary. +- `attribute.types` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Corresponding array of [attribute types](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes) provided by the dictionary. +- `bytes_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Amount of RAM allocated for the dictionary. +- `query_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of queries since the dictionary was loaded or since the last successful reboot. +- `hit_rate` ([Float64](../../sql-reference/data-types/float.md)) — For cache dictionaries, the percentage of uses for which the value was in the cache. +- `found_rate` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of uses for which the value was found. +- `element_count` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of items stored in the dictionary. +- `load_factor` ([Float64](../../sql-reference/data-types/float.md)) — Percentage filled in the dictionary (for a hashed dictionary, the percentage filled in the hash table). +- `source` ([String](../../sql-reference/data-types/string.md)) — Text describing the [data source](../../sql-reference/dictionaries/index.md#dictionary-sources) for the dictionary. +- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/index.md#dictionary-updates) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds. +- `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary. +- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes. +- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading. +- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created. +- `comment` ([String](../../sql-reference/data-types/string.md)) — Text of the comment to dictionary. **Example** diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index a079f3338d2..ed67e2a2416 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -7,12 +7,12 @@ Contains information about disks defined in the [server configuration](../../eng Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk in the server configuration. -- `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system. -- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. -- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. -- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space which is not taken by reservations (`free_space` minus the size of reservations taken by merges, inserts, and other disk write operations currently running). -- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk in the server configuration. +- `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system. +- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes. +- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. +- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space which is not taken by reservations (`free_space` minus the size of reservations taken by merges, inserts, and other disk write operations currently running). +- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. **Example** diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md index acc68372a4c..8cccf946621 100644 --- a/docs/en/operations/system-tables/distributed_ddl_queue.md +++ b/docs/en/operations/system-tables/distributed_ddl_queue.md @@ -7,18 +7,18 @@ Contains information about [distributed ddl queries (ON CLUSTER clause)](../../s Columns: -- `entry` ([String](../../sql-reference/data-types/string.md)) — Query id. -- `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname. -- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. -- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. -- `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name. -- `query` ([String](../../sql-reference/data-types/string.md)) — Query executed. -- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query. -- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. -- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds). -- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ClickHouse Keeper](../../operations/tips.md#zookeeper). +- `entry` ([String](../../sql-reference/data-types/string.md)) — Query id. +- `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname. +- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. +- `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query executed. +- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. +- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds). +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ClickHouse Keeper](../../operations/tips.md#zookeeper). **Example** diff --git a/docs/en/operations/system-tables/distribution_queue.md b/docs/en/operations/system-tables/distribution_queue.md index 8f461590ee1..0dee805c022 100644 --- a/docs/en/operations/system-tables/distribution_queue.md +++ b/docs/en/operations/system-tables/distribution_queue.md @@ -7,25 +7,25 @@ Contains information about local files that are in the queue to be sent to the s Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. -- `data_path` ([String](../../sql-reference/data-types/string.md)) — Path to the folder with local files. +- `data_path` ([String](../../sql-reference/data-types/string.md)) — Path to the folder with local files. -- `is_blocked` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag indicates whether sending local files to the server is blocked. +- `is_blocked` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag indicates whether sending local files to the server is blocked. -- `error_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of errors. +- `error_count` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of errors. -- `data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of local files in a folder. +- `data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of local files in a folder. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in local files, in bytes. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in local files, in bytes. -- `broken_data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of files that has been marked as broken (due to an error). +- `broken_data_files` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of files that has been marked as broken (due to an error). -- `broken_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in broken files, in bytes. +- `broken_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of compressed data in broken files, in bytes. -- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). +- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). **Example** @@ -48,4 +48,4 @@ last_exception: **See Also** -- [Distributed table engine](../../engines/table-engines/special/distributed.md) +- [Distributed table engine](../../engines/table-engines/special/distributed.md) diff --git a/docs/en/operations/system-tables/dropped_tables.md b/docs/en/operations/system-tables/dropped_tables.md index cb6cec0035a..144c03109ac 100644 --- a/docs/en/operations/system-tables/dropped_tables.md +++ b/docs/en/operations/system-tables/dropped_tables.md @@ -7,13 +7,13 @@ Contains information about tables that drop table has been executed but data cle Columns: -- `index` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Index in marked_dropped_tables queue. -- `database` ([String](../../sql-reference/data-types/string.md)) — Database. -- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid. -- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name. -- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory. -- `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec` +- `index` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Index in marked_dropped_tables queue. +- `database` ([String](../../sql-reference/data-types/string.md)) — Database. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid. +- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name. +- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory. +- `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec` **Example** diff --git a/docs/en/operations/system-tables/errors.md b/docs/en/operations/system-tables/errors.md index 48437f43b7d..01762962152 100644 --- a/docs/en/operations/system-tables/errors.md +++ b/docs/en/operations/system-tables/errors.md @@ -7,13 +7,13 @@ Contains error codes with the number of times they have been triggered. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — name of the error (`errorCodeToName`). -- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — code number of the error. -- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened. -- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened. -- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error. -- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored. -- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query). +- `name` ([String](../../sql-reference/data-types/string.md)) — name of the error (`errorCodeToName`). +- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — code number of the error. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — the number of times this error has been happened. +- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — time when the last error happened. +- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error. +- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored. +- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed query). **Example** diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index 68217a6daaf..ba5602ee292 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -7,9 +7,9 @@ Contains information about the number of events that have occurred in the system Columns: -- `event` ([String](../../sql-reference/data-types/string.md)) — Event name. -- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. -- `description` ([String](../../sql-reference/data-types/string.md)) — Event description. +- `event` ([String](../../sql-reference/data-types/string.md)) — Event name. +- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. +- `description` ([String](../../sql-reference/data-types/string.md)) — Event description. **Example** @@ -29,7 +29,7 @@ SELECT * FROM system.events LIMIT 5 **See Also** -- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. -- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md index cb4d8e938eb..60bfa08975b 100644 --- a/docs/en/operations/system-tables/functions.md +++ b/docs/en/operations/system-tables/functions.md @@ -7,8 +7,8 @@ Contains information about normal and aggregate functions. Columns: -- `name`(`String`) – The name of the function. -- `is_aggregate`(`UInt8`) — Whether the function is aggregate. +- `name`(`String`) – The name of the function. +- `is_aggregate`(`UInt8`) — Whether the function is aggregate. **Example** diff --git a/docs/en/operations/system-tables/grants.md b/docs/en/operations/system-tables/grants.md index 7f41f5f07dd..b12f656cb75 100644 --- a/docs/en/operations/system-tables/grants.md +++ b/docs/en/operations/system-tables/grants.md @@ -6,20 +6,20 @@ slug: /en/operations/system-tables/grants Privileges granted to ClickHouse user accounts. Columns: -- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — User name. +- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — User name. -- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Role assigned to user account. +- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Role assigned to user account. -- `access_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Access parameters for ClickHouse user account. +- `access_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Access parameters for ClickHouse user account. -- `database` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a database. +- `database` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a database. -- `table` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a table. +- `table` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a table. -- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a column to which access is granted. +- `column` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Name of a column to which access is granted. -- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows whether some privileges have been revoked. Possible values: +- `is_partial_revoke` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Logical value. It shows whether some privileges have been revoked. Possible values: - `0` — The row describes a partial revoke. - `1` — The row describes a grant. -- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax). +- `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax). diff --git a/docs/en/operations/system-tables/graphite_retentions.md b/docs/en/operations/system-tables/graphite_retentions.md index 230fa4cede9..19a751b6ef2 100644 --- a/docs/en/operations/system-tables/graphite_retentions.md +++ b/docs/en/operations/system-tables/graphite_retentions.md @@ -7,12 +7,12 @@ Contains information about parameters [graphite_rollup](../../operations/server- Columns: -- `config_name` (String) - `graphite_rollup` parameter name. -- `regexp` (String) - A pattern for the metric name. -- `function` (String) - The name of the aggregating function. -- `age` (UInt64) - The minimum age of the data in seconds. -- `precision` (UInt64) - How precisely to define the age of the data in seconds. -- `priority` (UInt16) - Pattern priority. -- `is_default` (UInt8) - Whether the pattern is the default. -- `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. -- `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. +- `config_name` (String) - `graphite_rollup` parameter name. +- `regexp` (String) - A pattern for the metric name. +- `function` (String) - The name of the aggregating function. +- `age` (UInt64) - The minimum age of the data in seconds. +- `precision` (UInt64) - How precisely to define the age of the data in seconds. +- `priority` (UInt16) - Pattern priority. +- `is_default` (UInt8) - Whether the pattern is the default. +- `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. +- `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 284ba866cc8..508419783ef 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -11,14 +11,14 @@ pagination_next: 'en/operations/system-tables/asynchronous_metric_log' System tables provide information about: -- Server states, processes, and environment. -- Server’s internal processes. +- Server states, processes, and environment. +- Server’s internal processes. System tables: -- Located in the `system` database. -- Available only for reading data. -- Can’t be dropped or altered, but can be detached. +- Located in the `system` database. +- Available only for reading data. +- Can’t be dropped or altered, but can be detached. Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. @@ -26,12 +26,12 @@ Unlike other system tables, the system log tables [metric_log](../../operations/ System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are: -- `database`: database the system log table belongs to. This option is deprecated now. All system log tables are under database `system`. -- `table`: table to insert data. -- `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression. -- `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression. -- `flush_interval_milliseconds`: interval of flushing data to disk. -- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit. +- `database`: database the system log table belongs to. This option is deprecated now. All system log tables are under database `system`. +- `table`: table to insert data. +- `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression. +- `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression. +- `flush_interval_milliseconds`: interval of flushing data to disk. +- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit. An example: @@ -56,8 +56,8 @@ By default, table growth is unlimited. To control a size of a table, you can use For collecting system metrics ClickHouse server uses: -- `CAP_NET_ADMIN` capability. -- [procfs](https://en.wikipedia.org/wiki/Procfs) (only in Linux). +- `CAP_NET_ADMIN` capability. +- [procfs](https://en.wikipedia.org/wiki/Procfs) (only in Linux). **procfs** @@ -65,13 +65,13 @@ If ClickHouse server does not have `CAP_NET_ADMIN` capability, it tries to fall If procfs is supported and enabled on the system, ClickHouse server collects these metrics: -- `OSCPUVirtualTimeMicroseconds` -- `OSCPUWaitMicroseconds` -- `OSIOWaitMicroseconds` -- `OSReadChars` -- `OSWriteChars` -- `OSReadBytes` -- `OSWriteBytes` +- `OSCPUVirtualTimeMicroseconds` +- `OSCPUWaitMicroseconds` +- `OSIOWaitMicroseconds` +- `OSReadChars` +- `OSWriteChars` +- `OSReadBytes` +- `OSWriteBytes` ## Related content diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md index a8e516f02a3..07e9a9e2f58 100644 --- a/docs/en/operations/system-tables/information_schema.md +++ b/docs/en/operations/system-tables/information_schema.md @@ -22,10 +22,10 @@ SHOW TABLES FROM INFORMATION_SCHEMA; `INFORMATION_SCHEMA` contains the following views: -- [COLUMNS](#columns) -- [SCHEMATA](#schemata) -- [TABLES](#tables) -- [VIEWS](#views) +- [COLUMNS](#columns) +- [SCHEMATA](#schemata) +- [TABLES](#tables) +- [VIEWS](#views) ## COLUMNS {#columns} @@ -33,29 +33,29 @@ Contains columns read from the [system.columns](../../operations/system-tables/c Columns: -- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `column_name` ([String](../../sql-reference/data-types/string.md)) — Column name. -- `ordinal_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. -- `column_default` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. -- `is_nullable` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column type is `Nullable`. -- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type. -- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. -- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. -- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. -- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. -- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. -- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. -- `character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `collation_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `collation_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `collation_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `domain_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `domain_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `domain_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `column_name` ([String](../../sql-reference/data-types/string.md)) — Column name. +- `ordinal_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. +- `column_default` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. +- `is_nullable` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column type is `Nullable`. +- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type. +- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. +- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned. +- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned. +- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned. +- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned. +- `character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `collation_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `collation_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `collation_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `domain_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `domain_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `domain_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. **Example** @@ -101,13 +101,13 @@ Contains columns read from the [system.databases](../../operations/system-tables Columns: -- `catalog_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. -- `schema_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. -- `schema_owner` ([String](../../sql-reference/data-types/string.md)) — Schema owner name, always `'default'`. -- `default_character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `default_character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `default_character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. -- `sql_path` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `catalog_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. +- `schema_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database. +- `schema_owner` ([String](../../sql-reference/data-types/string.md)) — Schema owner name, always `'default'`. +- `default_character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `default_character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `default_character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `sql_path` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. **Example** @@ -137,15 +137,15 @@ Contains columns read from the [system.tables](../../operations/system-tables/ta Columns: -- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Table type. Possible values: - - `BASE TABLE` - - `VIEW` - - `FOREIGN TABLE` - - `LOCAL TEMPORARY` - - `SYSTEM VIEW` +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Table type. Possible values: + - `BASE TABLE` + - `VIEW` + - `FOREIGN TABLE` + - `LOCAL TEMPORARY` + - `SYSTEM VIEW` **Example** @@ -172,18 +172,18 @@ Contains columns read from the [system.tables](../../operations/system-tables/ta Columns: -- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. -- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` query for view. -- `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, no checking. -- `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the view is not updated. -- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view.md/#materialized-view). Possible values: - - `NO` — The created view is not materialized. - - `YES` — The created view is materialized. -- `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not updated. -- `is_trigger_deletable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not deleted. -- `is_trigger_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, no data is inserted into the trigger. +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` query for view. +- `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, no checking. +- `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the view is not updated. +- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view.md/#materialized-view). Possible values: + - `NO` — The created view is not materialized. + - `YES` — The created view is materialized. +- `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not updated. +- `is_trigger_deletable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not deleted. +- `is_trigger_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, no data is inserted into the trigger. **Example** diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index 672c79e335b..d8539908bf7 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -7,11 +7,11 @@ Contains information about settings for `MergeTree` tables. Columns: -- `name` (String) — Setting name. -- `value` (String) — Setting value. -- `description` (String) — Setting description. -- `type` (String) — Setting type (implementation specific string value). -- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. +- `name` (String) — Setting name. +- `value` (String) — Setting value. +- `description` (String) — Setting description. +- `type` (String) — Setting type (implementation specific string value). +- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. **Example** ```sql diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md index a5055639393..ec1d4c8e656 100644 --- a/docs/en/operations/system-tables/merges.md +++ b/docs/en/operations/system-tables/merges.md @@ -7,20 +7,20 @@ Contains information about merges and part mutations currently in process for ta Columns: -- `database` (String) — The name of the database the table is in. -- `table` (String) — Table name. -- `elapsed` (Float64) — The time elapsed (in seconds) since the merge started. -- `progress` (Float64) — The percentage of completed work from 0 to 1. -- `num_parts` (UInt64) — The number of pieces to be merged. -- `result_part_name` (String) — The name of the part that will be formed as the result of merging. -- `is_mutation` (UInt8) — 1 if this process is a part mutation. -- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. -- `total_size_marks` (UInt64) — The total number of marks in the merged parts. -- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. -- `rows_read` (UInt64) — Number of rows read. -- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. -- `rows_written` (UInt64) — Number of rows written. -- `memory_usage` (UInt64) — Memory consumption of the merge process. -- `thread_id` (UInt64) — Thread ID of the merge process. -- `merge_type` — The type of current merge. Empty if it's an mutation. -- `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation. +- `database` (String) — The name of the database the table is in. +- `table` (String) — Table name. +- `elapsed` (Float64) — The time elapsed (in seconds) since the merge started. +- `progress` (Float64) — The percentage of completed work from 0 to 1. +- `num_parts` (UInt64) — The number of pieces to be merged. +- `result_part_name` (String) — The name of the part that will be formed as the result of merging. +- `is_mutation` (UInt8) — 1 if this process is a part mutation. +- `total_size_bytes_compressed` (UInt64) — The total size of the compressed data in the merged chunks. +- `total_size_marks` (UInt64) — The total number of marks in the merged parts. +- `bytes_read_uncompressed` (UInt64) — Number of bytes read, uncompressed. +- `rows_read` (UInt64) — Number of rows read. +- `bytes_written_uncompressed` (UInt64) — Number of bytes written, uncompressed. +- `rows_written` (UInt64) — Number of rows written. +- `memory_usage` (UInt64) — Memory consumption of the merge process. +- `thread_id` (UInt64) — Thread ID of the merge process. +- `merge_type` — The type of current merge. Empty if it's an mutation. +- `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation. diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index b22e672a31b..9ea0dde3f80 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -6,9 +6,9 @@ slug: /en/operations/system-tables/metric_log Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. **Example** @@ -45,8 +45,8 @@ CurrentMetric_DistributedFilesToInsert: 0 **See also** -- [metric_log setting](../../operations/server-configuration-parameters/settings.md#metric_log) — Enabling and disabling the setting. -- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics. -- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. -- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics. -- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [metric_log setting](../../operations/server-configuration-parameters/settings.md#metric_log) — Enabling and disabling the setting. +- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md) — Contains periodically calculated metrics. +- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. +- [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 141fc3c82c2..5a7dfd03eb4 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -7,9 +7,9 @@ Contains metrics which can be calculated instantly, or have a current value. For Columns: -- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. -- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. +- `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. +- `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. +- `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. @@ -34,9 +34,723 @@ SELECT * FROM system.metrics LIMIT 10 └──────────────────────────────────────┴───────┴────────────────────────────────────────────────────────────────────────┘ ``` +## Metric descriptions + +### AggregatorThreads + +Number of threads in the Aggregator thread pool. + +### AggregatorThreadsActive + +Number of threads in the Aggregator thread pool running a task. + +### AsyncInsertCacheSize + +Number of async insert hash id in cache + +### AsynchronousInsertThreads + +Number of threads in the AsynchronousInsert thread pool. + +### AsynchronousInsertThreadsActive + +Number of threads in the AsynchronousInsert thread pool running a task. + +### AsynchronousReadWait + +Number of threads waiting for asynchronous read. + +### BackgroundBufferFlushSchedulePoolSize + +Limit on number of tasks in BackgroundBufferFlushSchedulePool + +### BackgroundBufferFlushSchedulePoolTask + +Number of active tasks in BackgroundBufferFlushSchedulePool. This pool is used for periodic Buffer flushes + +### BackgroundCommonPoolSize + +Limit on number of tasks in an associated background pool + +### BackgroundCommonPoolTask + +Number of active tasks in an associated background pool + +### BackgroundDistributedSchedulePoolSize + +Limit on number of tasks in BackgroundDistributedSchedulePool + +### BackgroundDistributedSchedulePoolTask + +Number of active tasks in BackgroundDistributedSchedulePool. This pool is used for distributed sends that is done in background. + +### BackgroundFetchesPoolSize + +Limit on number of simultaneous fetches in an associated background pool + +### BackgroundFetchesPoolTask + +Number of active fetches in an associated background pool + +### BackgroundMergesAndMutationsPoolSize + +Limit on number of active merges and mutations in an associated background pool + +### BackgroundMergesAndMutationsPoolTask + +Number of active merges and mutations in an associated background pool + +### BackgroundMessageBrokerSchedulePoolSize + +Limit on number of tasks in BackgroundProcessingPool for message streaming + +### BackgroundMessageBrokerSchedulePoolTask + +Number of active tasks in BackgroundProcessingPool for message streaming + +### BackgroundMovePoolSize + +Limit on number of tasks in BackgroundProcessingPool for moves + +### BackgroundMovePoolTask + +Number of active tasks in BackgroundProcessingPool for moves + +### BackgroundSchedulePoolSize + +Limit on number of tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc. + +### BackgroundSchedulePoolTask + +Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc. + +### BackupsIOThreads + +Number of threads in the BackupsIO thread pool. + +### BackupsIOThreadsActive + +Number of threads in the BackupsIO thread pool running a task. + +### BackupsThreads + +Number of threads in the thread pool for BACKUP. + +### BackupsThreadsActive + +Number of threads in thread pool for BACKUP running a task. + +### BrokenDistributedFilesToInsert + +Number of files for asynchronous insertion into Distributed tables that has been marked as broken. This metric will starts from 0 on start. Number of files for every shard is summed. + +### CacheDetachedFileSegments + +Number of existing detached cache file segments + +### CacheDictionaryThreads + +Number of threads in the CacheDictionary thread pool. + +### CacheDictionaryThreadsActive + +Number of threads in the CacheDictionary thread pool running a task. + +### CacheDictionaryUpdateQueueBatches + +Number of 'batches' (a set of keys) in update queue in CacheDictionaries. + +### CacheDictionaryUpdateQueueKeys + +Exact number of keys in update queue in CacheDictionaries. + +### CacheFileSegments + +Number of existing cache file segments + +### ContextLockWait + +Number of threads waiting for lock in Context. This is global lock. + +### DDLWorkerThreads + +Number of threads in the DDLWorker thread pool for ON CLUSTER queries. + +### DDLWorkerThreadsActive + +Number of threads in the DDLWORKER thread pool for ON CLUSTER queries running a task. + +### DatabaseCatalogThreads + +Number of threads in the DatabaseCatalog thread pool. + +### DatabaseCatalogThreadsActive + +Number of threads in the DatabaseCatalog thread pool running a task. + +### DatabaseOnDiskThreads + +Number of threads in the DatabaseOnDisk thread pool. + +### DatabaseOnDiskThreadsActive + +Number of threads in the DatabaseOnDisk thread pool running a task. + +### DatabaseOrdinaryThreads + +Number of threads in the Ordinary database thread pool. + +### DatabaseOrdinaryThreadsActive + +Number of threads in the Ordinary database thread pool running a task. + +### DelayedInserts + +Number of INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree table. + +### DestroyAggregatesThreads + +Number of threads in the thread pool for destroy aggregate states. + +### DestroyAggregatesThreadsActive + +Number of threads in the thread pool for destroy aggregate states running a task. + +### DictCacheRequests + +Number of requests in fly to data sources of dictionaries of cache type. + +### DiskObjectStorageAsyncThreads + +Number of threads in the async thread pool for DiskObjectStorage. + +### DiskObjectStorageAsyncThreadsActive + +Number of threads in the async thread pool for DiskObjectStorage running a task. + +### DiskSpaceReservedForMerge + +Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts. + +### DistributedFilesToInsert + +Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed. + +### DistributedSend + +Number of connections to remote servers sending data that was INSERTed into Distributed tables. Both synchronous and asynchronous mode. + +### EphemeralNode + +Number of ephemeral nodes hold in ZooKeeper. + +### FilesystemCacheElements + +Filesystem cache elements (file segments) + +### FilesystemCacheReadBuffers + +Number of active cache buffers + +### FilesystemCacheSize + +Filesystem cache size in bytes + +### GlobalThread + +Number of threads in global thread pool. + +### GlobalThreadActive + +Number of threads in global thread pool running a task. + +### HTTPConnection + +Number of connections to HTTP server + +### HashedDictionaryThreads + +Number of threads in the HashedDictionary thread pool. + +### HashedDictionaryThreadsActive + +Number of threads in the HashedDictionary thread pool running a task. + +### IOPrefetchThreads + +Number of threads in the IO prefertch thread pool. + +### IOPrefetchThreadsActive + +Number of threads in the IO prefetch thread pool running a task. + +### IOThreads + +Number of threads in the IO thread pool. + +### IOThreadsActive + +Number of threads in the IO thread pool running a task. + +### IOUringInFlightEvents + +Number of io_uring SQEs in flight + +### IOUringPendingEvents + +Number of io_uring SQEs waiting to be submitted + +### IOWriterThreads + +Number of threads in the IO writer thread pool. + +### IOWriterThreadsActive + +Number of threads in the IO writer thread pool running a task. + +### InterserverConnection + +Number of connections from other replicas to fetch parts + +### KafkaAssignedPartitions + +Number of partitions Kafka tables currently assigned to + +### KafkaBackgroundReads + +Number of background reads currently working (populating materialized views from Kafka) + +### KafkaConsumers + +Number of active Kafka consumers + +### KafkaConsumersInUse + +Number of consumers which are currently used by direct or background reads + +### KafkaConsumersWithAssignment + +Number of active Kafka consumers which have some partitions assigned. + +### KafkaLibrdkafkaThreads + +Number of active librdkafka threads + +### KafkaProducers + +Number of active Kafka producer created + +### KafkaWrites + +Number of currently running inserts to Kafka + +### KeeperAliveConnections + +Number of alive connections + +### KeeperOutstandingRequets + +Number of outstanding requests + +### LocalThread + +Number of threads in local thread pools. The threads in local thread pools are taken from the global thread pool. + +### LocalThreadActive + +Number of threads in local thread pools running a task. + +### MMappedAllocBytes + +Sum bytes of mmapped allocations + +### MMappedAllocs + +Total number of mmapped allocations + +### MMappedFileBytes + +Sum size of mmapped file regions. + +### MMappedFiles + +Total number of mmapped files. + +### MarksLoaderThreads + +Number of threads in thread pool for loading marks. + +### MarksLoaderThreadsActive + +Number of threads in the thread pool for loading marks running a task. + +### MaxDDLEntryID + +Max processed DDL entry of DDLWorker. + +### MaxPushedDDLEntryID + +Max DDL entry of DDLWorker that pushed to zookeeper. + +### MemoryTracking + +Total amount of memory (bytes) allocated by the server. + +### Merge + +Number of executing background merges + +### MergeTreeAllRangesAnnouncementsSent + +The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side. + +### MergeTreeBackgroundExecutorThreads + +Number of threads in the MergeTreeBackgroundExecutor thread pool. + +### MergeTreeBackgroundExecutorThreadsActive + +Number of threads in the MergeTreeBackgroundExecutor thread pool running a task. + +### MergeTreeDataSelectExecutorThreads + +Number of threads in the MergeTreeDataSelectExecutor thread pool. + +### MergeTreeDataSelectExecutorThreadsActive + +Number of threads in the MergeTreeDataSelectExecutor thread pool running a task. + +### MergeTreePartsCleanerThreads + +Number of threads in the MergeTree parts cleaner thread pool. + +### MergeTreePartsCleanerThreadsActive + +Number of threads in the MergeTree parts cleaner thread pool running a task. + +### MergeTreePartsLoaderThreads + +Number of threads in the MergeTree parts loader thread pool. + +### MergeTreePartsLoaderThreadsActive + +Number of threads in the MergeTree parts loader thread pool running a task. + +### MergeTreeReadTaskRequestsSent + +The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side. + +### Move + +Number of currently executing moves + +### MySQLConnection + +Number of client connections using MySQL protocol + +### NetworkReceive + +Number of threads receiving data from network. Only ClickHouse-related network interaction is included, not by 3rd party libraries. + +### NetworkSend + +Number of threads sending data to network. Only ClickHouse-related network interaction is included, not by 3rd party libraries. + +### OpenFileForRead + +Number of files open for reading + +### OpenFileForWrite + +Number of files open for writing + +### ParallelFormattingOutputFormatThreads + +Number of threads in the ParallelFormattingOutputFormatThreads thread pool. + +### ParallelFormattingOutputFormatThreadsActive + +Number of threads in the ParallelFormattingOutputFormatThreads thread pool running a task. + +### ParallelParsingInputFormatThreads + +Number of threads in the ParallelParsingInputFormat thread pool. + +### ParallelParsingInputFormatThreadsActive + +Number of threads in the ParallelParsingInputFormat thread pool running a task. + +### PartMutation + +Number of mutations (ALTER DELETE/UPDATE) + +### PartsActive + +Active data part, used by current and upcoming SELECTs. + +### PartsCommitted + +Deprecated. See PartsActive. + +### PartsCompact + +Compact parts. + +### PartsDeleteOnDestroy + +Part was moved to another disk and should be deleted in own destructor. + +### PartsDeleting + +Not active data part with identity refcounter, it is deleting right now by a cleaner. + +### PartsInMemory + +In-memory parts. + +### PartsOutdated + +Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes. + +### PartsPreActive + +The part is in data_parts, but not used for SELECTs. + +### PartsPreCommitted + +Deprecated. See PartsPreActive. + +### PartsTemporary + +The part is generating now, it is not in data_parts list. + +### PartsWide + +Wide parts. + +### PendingAsyncInsert + +Number of asynchronous inserts that are waiting for flush. + +### PostgreSQLConnection + +Number of client connections using PostgreSQL protocol + +### Query + +Number of executing queries + +### QueryPreempted + +Number of queries that are stopped and waiting due to 'priority' setting. + +### QueryThread + +Number of query processing threads + +### RWLockActiveReaders + +Number of threads holding read lock in a table RWLock. + +### RWLockActiveWriters + +Number of threads holding write lock in a table RWLock. + +### RWLockWaitingReaders + +Number of threads waiting for read on a table RWLock. + +### RWLockWaitingWriters + +Number of threads waiting for write on a table RWLock. + +### Read + +Number of read (read, pread, io_getevents, etc.) syscalls in fly + +### ReadTaskRequestsSent + +The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for s3Cluster table function and similar). Measured on the remote server side. + +### ReadonlyReplica + +Number of Replicated tables that are currently in readonly state due to re-initialization after ZooKeeper session loss or due to startup without ZooKeeper configured. + +### RemoteRead + +Number of read with remote reader in fly + +### ReplicatedChecks + +Number of data parts checking for consistency + +### ReplicatedFetch + +Number of data parts being fetched from replica + +### ReplicatedSend + +Number of data parts being sent to replicas + +### RestartReplicaThreads + +Number of threads in the RESTART REPLICA thread pool. + +### RestartReplicaThreadsActive + +Number of threads in the RESTART REPLICA thread pool running a task. + +### RestoreThreads + +Number of threads in the thread pool for RESTORE. + +### RestoreThreadsActive + +Number of threads in the thread pool for RESTORE running a task. + +### Revision + +Revision of the server. It is a number incremented for every release or release candidate except patch releases. + +### S3Requests + +S3 requests + +### SendExternalTables + +Number of connections that are sending data for external tables to remote servers. External tables are used to implement GLOBAL IN and GLOBAL JOIN operators with distributed subqueries. + +### SendScalars + +Number of connections that are sending data for scalars to remote servers. + +### StartupSystemTablesThreads + +Number of threads in the StartupSystemTables thread pool. + +### StartupSystemTablesThreadsActive + +Number of threads in the StartupSystemTables thread pool running a task. + +### StorageBufferBytes + +Number of bytes in buffers of Buffer tables + +### StorageBufferRows + +Number of rows in buffers of Buffer tables + +### StorageDistributedThreads + +Number of threads in the StorageDistributed thread pool. + +### StorageDistributedThreadsActive + +Number of threads in the StorageDistributed thread pool running a task. + +### StorageHiveThreads + +Number of threads in the StorageHive thread pool. + +### StorageHiveThreadsActive + +Number of threads in the StorageHive thread pool running a task. + +### StorageS3Threads + +Number of threads in the StorageS3 thread pool. + +### StorageS3ThreadsActive + +Number of threads in the StorageS3 thread pool running a task. + +### SystemReplicasThreads + +Number of threads in the system.replicas thread pool. + +### SystemReplicasThreadsActive + +Number of threads in the system.replicas thread pool running a task. + +### TCPConnection + +Number of connections to TCP server (clients with native interface), also included server-server distributed query connections + +### TablesLoaderThreads + +Number of threads in the tables loader thread pool. + +### TablesLoaderThreadsActive + +Number of threads in the tables loader thread pool running a task. + +### TablesToDropQueueSize + +Number of dropped tables, that are waiting for background data removal. + +### TemporaryFilesForAggregation + +Number of temporary files created for external aggregation + +### TemporaryFilesForJoin + +Number of temporary files created for JOIN + +### TemporaryFilesForSort + +Number of temporary files created for external sorting + +### TemporaryFilesUnknown + +Number of temporary files created without known purpose + +### ThreadPoolFSReaderThreads + +Number of threads in the thread pool for local_filesystem_read_method=threadpool. + +### ThreadPoolFSReaderThreadsActive + +Number of threads in the thread pool for local_filesystem_read_method=threadpool running a task. + +### ThreadPoolRemoteFSReaderThreads + +Number of threads in the thread pool for remote_filesystem_read_method=threadpool. + +### ThreadPoolRemoteFSReaderThreadsActive + +Number of threads in the thread pool for remote_filesystem_read_method=threadpool running a task. + +### ThreadsInOvercommitTracker + +Number of waiting threads inside of OvercommitTracker + +### TotalTemporaryFiles + +Number of temporary files created + +### VersionInteger + +Version of the server in a single integer number in base-1000. For example, version 11.22.33 is translated to 11022033. + +### Write + +Number of write (write, pwrite, io_getevents, etc.) syscalls in fly + +### ZooKeeperRequest + +Number of requests to ZooKeeper in fly. + +### ZooKeeperSession + +Number of sessions (connections) to ZooKeeper. Should be no more than one, because using more than one connection to ZooKeeper may lead to bugs due to lack of linearizability (stale reads) that ZooKeeper consistency model allows. + +### ZooKeeperWatch + +Number of watches (event subscriptions) in ZooKeeper. + **See Also** -- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. -- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. -- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. +- [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) — Contains periodically calculated metrics. +- [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that occurred. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. +- [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. diff --git a/docs/en/operations/system-tables/moves.md b/docs/en/operations/system-tables/moves.md index 54f07540507..cd543ce7536 100644 --- a/docs/en/operations/system-tables/moves.md +++ b/docs/en/operations/system-tables/moves.md @@ -7,21 +7,21 @@ The table contains information about in-progress [data part moves](/docs/en/sql- Columns: -- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the table containing moving data part. +- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the table containing moving data part. -- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — Time elapsed (in seconds) since data part movement started. +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — Time elapsed (in seconds) since data part movement started. -- `target_disk_name` ([String](disks.md)) — Name of [disk](/docs/en/operations/system-tables/disks/) to which the data part is moving. +- `target_disk_name` ([String](disks.md)) — Name of [disk](/docs/en/operations/system-tables/disks/) to which the data part is moving. -- `target_disk_path` ([String](disks.md)) — Path to the mount point of the [disk](/docs/en/operations/system-tables/disks/) in the file system. +- `target_disk_path` ([String](disks.md)) — Path to the mount point of the [disk](/docs/en/operations/system-tables/disks/) in the file system. -- `part_name` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the data part being moved. +- `part_name` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the data part being moved. -- `part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Data part size. +- `part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Data part size. -- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Identifier of a thread performing the movement. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Identifier of a thread performing the movement. **Example** @@ -37,6 +37,6 @@ SELECT * FROM system.moves **See Also** -- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine -- [Using Multiple Block Devices for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-multiple-volumes) -- [ALTER TABLE ... MOVE PART](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) command +- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine +- [Using Multiple Block Devices for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-multiple-volumes) +- [ALTER TABLE ... MOVE PART](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) command diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index d65bccb9aaa..64e86992af9 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -5,31 +5,31 @@ slug: /en/operations/system-tables/mutations The table contains information about [mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row. -Columns: +## Columns: -- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the database to which the mutation was applied. +- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the database to which the mutation was applied. -- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the table to which the mutation was applied. +- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the table to which the mutation was applied. -- `mutation_id` ([String](/docs/en/sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ClickHouse Keeper. For non-replicated tables the IDs correspond to file names in the data directory of the table. +- `mutation_id` ([String](/docs/en/sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ClickHouse Keeper. For non-replicated tables the IDs correspond to file names in the data directory of the table. -- `command` ([String](/docs/en/sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`). +- `command` ([String](/docs/en/sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`). -- `create_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Date and time when the mutation command was submitted for execution. +- `create_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Date and time when the mutation command was submitted for execution. -- `block_numbers.partition_id` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty. +- `block_numbers.partition_id` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty. -- `block_numbers.number` ([Array](/docs/en/sql-reference/data-types/array.md)([Int64](/docs/en/sql-reference/data-types/int-uint.md))) — For mutations of replicated tables, the array contains one record for each partition, with the block number that was acquired by the mutation. Only parts that contain blocks with numbers less than this number will be mutated in the partition. +- `block_numbers.number` ([Array](/docs/en/sql-reference/data-types/array.md)([Int64](/docs/en/sql-reference/data-types/int-uint.md))) — For mutations of replicated tables, the array contains one record for each partition, with the block number that was acquired by the mutation. Only parts that contain blocks with numbers less than this number will be mutated in the partition. In non-replicated tables, block numbers in all partitions form a single sequence. This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation. -- `parts_to_do_names` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — An array of names of data parts that need to be mutated for the mutation to complete. +- `parts_to_do_names` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — An array of names of data parts that need to be mutated for the mutation to complete. -- `parts_to_do` ([Int64](/docs/en/sql-reference/data-types/int-uint.md)) — The number of data parts that need to be mutated for the mutation to complete. +- `parts_to_do` ([Int64](/docs/en/sql-reference/data-types/int-uint.md)) — The number of data parts that need to be mutated for the mutation to complete. -- `is_done` ([UInt8](/docs/en/sql-reference/data-types/int-uint.md)) — The flag whether the mutation is done or not. Possible values: - - `1` if the mutation is completed, - - `0` if the mutation is still in process. +- `is_done` ([UInt8](/docs/en/sql-reference/data-types/int-uint.md)) — The flag whether the mutation is done or not. Possible values: + - `1` if the mutation is completed, + - `0` if the mutation is still in process. :::note Even if `parts_to_do = 0` it is possible that a mutation of a replicated table is not completed yet because of a long-running `INSERT` query, that will create a new data part needed to be mutated. @@ -37,14 +37,27 @@ Even if `parts_to_do = 0` it is possible that a mutation of a replicated table i If there were problems with mutating some data parts, the following columns contain additional information: -- `latest_failed_part` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the most recent part that could not be mutated. +- `latest_failed_part` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the most recent part that could not be mutated. -- `latest_fail_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — The date and time of the most recent part mutation failure. +- `latest_fail_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — The date and time of the most recent part mutation failure. -- `latest_fail_reason` ([String](/docs/en/sql-reference/data-types/string.md)) — The exception message that caused the most recent part mutation failure. +- `latest_fail_reason` ([String](/docs/en/sql-reference/data-types/string.md)) — The exception message that caused the most recent part mutation failure. + +## Monitoring Mutations + +To track the progress on the system.mutations table, use a query like the following - this requires read permissions on the system.* tables: + +``` sql +SELECT * FROM clusterAllReplicas('cluster_name', 'db', system.mutations) +WHERE is_done=0 AND table='tmp'; +``` + +:::tip +replace `tmp` in `table='tmp'` with the name of the table that you are checking mutations on. +::: **See Also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) -- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine -- [ReplicatedMergeTree](/docs/en/engines/table-engines/mergetree-family/replication.md) family +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine +- [ReplicatedMergeTree](/docs/en/engines/table-engines/mergetree-family/replication.md) family diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md index 7d7d1ef1b04..a605a46c14c 100644 --- a/docs/en/operations/system-tables/opentelemetry_span_log.md +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -7,30 +7,30 @@ Contains information about [trace spans](https://opentracing.io/docs/overview/sp Columns: -- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — ID of the trace for executed query. +- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — ID of the trace for executed query. -- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. +- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. -- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`. +- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`. -- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. +- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation. -- `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span. - - `INTERNAL` — Indicates that the span represents an internal operation within an application. - - `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request. - - `CLIENT` — Indicates that the span describes a request to some remote service. - - `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. - - `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request. +- `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span. + - `INTERNAL` — Indicates that the span represents an internal operation within an application. + - `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request. + - `CLIENT` — Indicates that the span describes a request to some remote service. + - `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts. + - `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request. -- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). +- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds). -- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). +- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds). -- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. +- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`. -- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. +- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard. -- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. +- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard. **Example** @@ -59,4 +59,4 @@ attribute.values: [] **See Also** -- [OpenTelemetry](../../operations/opentelemetry.md) +- [OpenTelemetry](../../operations/opentelemetry.md) diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index 8ccde889289..c9e34962c79 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -9,41 +9,41 @@ This table contains information about events that occurred with [data parts](../ The `system.part_log` table contains the following columns: -- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part. -- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: - - `NEW_PART` — Inserting of a new data part. - - `MERGE_PARTS` — Merging of data parts. - - `DOWNLOAD_PART` — Downloading a data part. - - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). - - `MUTATE_PART` — Mutating of a data part. - - `MOVE_PART` — Moving the data part from the one disk to another one. -- `merge_reason` ([Enum8](../../sql-reference/data-types/enum.md)) — The reason for the event with type `MERGE_PARTS`. Can have one of the following values: - - `NOT_A_MERGE` — The current event has the type other than `MERGE_PARTS`. - - `REGULAR_MERGE` — Some regular merge. - - `TTL_DELETE_MERGE` — Cleaning up expired data. - - `TTL_RECOMPRESS_MERGE` — Recompressing data part with the. -- `merge_algorithm` ([Enum8](../../sql-reference/data-types/enum.md)) — Merge algorithm for the event with type `MERGE_PARTS`. Can have one of the following values: - - `UNDECIDED` - - `HORIZONTAL` - - `VERTICAL` -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision. -- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration. -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in. -- `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. -- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`. -- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. -- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part. -- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes. -- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge). -- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge. -- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. -- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error. -- `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part. +- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values: + - `NEW_PART` — Inserting of a new data part. + - `MERGE_PARTS` — Merging of data parts. + - `DOWNLOAD_PART` — Downloading a data part. + - `REMOVE_PART` — Removing or detaching a data part using [DETACH PARTITION](../../sql-reference/statements/alter/partition.md#alter_detach-partition). + - `MUTATE_PART` — Mutating of a data part. + - `MOVE_PART` — Moving the data part from the one disk to another one. +- `merge_reason` ([Enum8](../../sql-reference/data-types/enum.md)) — The reason for the event with type `MERGE_PARTS`. Can have one of the following values: + - `NOT_A_MERGE` — The current event has the type other than `MERGE_PARTS`. + - `REGULAR_MERGE` — Some regular merge. + - `TTL_DELETE_MERGE` — Cleaning up expired data. + - `TTL_RECOMPRESS_MERGE` — Recompressing data part with the. +- `merge_algorithm` ([Enum8](../../sql-reference/data-types/enum.md)) — Merge algorithm for the event with type `MERGE_PARTS`. Can have one of the following values: + - `UNDECIDED` + - `HORIZONTAL` + - `VERTICAL` +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds precision. +- `duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database the data part is in. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table the data part is in. +- `part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition that the data part was inserted to. The column takes the `all` value if the partitioning is by `tuple()`. +- `path_on_disk` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows in the data part. +- `size_in_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of the data part in bytes. +- `merged_from` ([Array(String)](../../sql-reference/data-types/array.md)) — An array of names of the parts which the current part was made up from (after the merge). +- `bytes_uncompressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Size of uncompressed bytes. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows was read during the merge. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes was read during the merge. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. +- `error` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The code number of the occurred error. +- `exception` ([String](../../sql-reference/data-types/string.md)) — Text message of the occurred error. The `system.part_log` table is created after the first inserting data to the `MergeTree` table. diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index e7700562e35..e61c6ed2ba4 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -9,107 +9,107 @@ Each row describes one data part. Columns: -- `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. +- `partition` ([String](../../sql-reference/data-types/string.md)) – The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. Formats: - - `YYYYMM` for automatic partitioning by month. - - `any_string` when partitioning manually. + - `YYYYMM` for automatic partitioning by month. + - `any_string` when partitioning manually. -- `name` ([String](../../sql-reference/data-types/string.md)) – Name of the data part. +- `name` ([String](../../sql-reference/data-types/string.md)) – Name of the data part. -- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format. +- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format. Possible Values: - - `Wide` — Each column is stored in a separate file in a filesystem. - - `Compact` — All columns are stored in one file in a filesystem. + - `Wide` — Each column is stored in a separate file in a filesystem. + - `Compact` — All columns are stored in one file in a filesystem. Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table. - - `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. + - `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. -- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). +- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). -- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of rows. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of rows. -- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of all the data part files in bytes. +- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of all the data part files in bytes. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. -- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. +- `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. -- `secondary_indices_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. +- `secondary_indices_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. -- `secondary_indices_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks for secondary indices. +- `secondary_indices_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks for secondary indices. -- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation. +- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time the directory with the data part was modified. This usually corresponds to the time of data part creation. -- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time when the data part became inactive. +- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The time when the data part became inactive. -- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. +- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) – The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. -- `min_date` ([Date](../../sql-reference/data-types/date.md)) – The minimum value of the date key in the data part. +- `min_date` ([Date](../../sql-reference/data-types/date.md)) – The minimum value of the date key in the data part. -- `max_date` ([Date](../../sql-reference/data-types/date.md)) – The maximum value of the date key in the data part. +- `max_date` ([Date](../../sql-reference/data-types/date.md)) – The maximum value of the date key in the data part. -- `min_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The minimum value of the date and time key in the data part. +- `min_time` ([DateTime](../../sql-reference/data-types/datetime.md)) – The minimum value of the date and time key in the data part. -- `max_time`([DateTime](../../sql-reference/data-types/datetime.md)) – The maximum value of the date and time key in the data part. +- `max_time`([DateTime](../../sql-reference/data-types/datetime.md)) – The maximum value of the date and time key in the data part. -- `partition_id` ([String](../../sql-reference/data-types/string.md)) – ID of the partition. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) – ID of the partition. -- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The minimum number of data parts that make up the current part after merging. +- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The minimum number of data parts that make up the current part after merging. -- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The maximum number of data parts that make up the current part after merging. +- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The maximum number of data parts that make up the current part after merging. -- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) – Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. +- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) – Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. -- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). +- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). -- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values. +- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values. -- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) reserved for primary key values. +- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) reserved for primary key values. -- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md/#alter_freeze-partition) +- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md/#alter_freeze-partition) -- `database` ([String](../../sql-reference/data-types/string.md)) – Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) – Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) – Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) – Name of the table. -- `engine` ([String](../../sql-reference/data-types/string.md)) – Name of the table engine without parameters. +- `engine` ([String](../../sql-reference/data-types/string.md)) – Name of the table engine without parameters. -- `path` ([String](../../sql-reference/data-types/string.md)) – Absolute path to the folder with data part files. +- `path` ([String](../../sql-reference/data-types/string.md)) – Absolute path to the folder with data part files. -- `disk_name` ([String](../../sql-reference/data-types/string.md)) – Name of a disk that stores the data part. +- `disk_name` ([String](../../sql-reference/data-types/string.md)) – Name of a disk that stores the data part. -- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of compressed files. +- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of compressed files. -- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.). +- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.). -- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of data in the compressed files as if they were uncompressed. +- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of data in the compressed files as if they were uncompressed. -- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — The minimum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — The minimum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). -- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — The maximum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — The maximum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). -- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). :::note The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields. ::: -- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). -- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the maximum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). +- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the maximum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl). -- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Alias for `bytes_on_disk`. +- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Alias for `bytes_on_disk`. -- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Alias for `marks_bytes`. +- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Alias for `marks_bytes`. **Example** @@ -165,5 +165,5 @@ move_ttl_info.max: [] **See Also** -- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) -- [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl) +- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) +- [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl) diff --git a/docs/en/operations/system-tables/parts_columns.md b/docs/en/operations/system-tables/parts_columns.md index d389d5bbb29..00d7164af59 100644 --- a/docs/en/operations/system-tables/parts_columns.md +++ b/docs/en/operations/system-tables/parts_columns.md @@ -9,93 +9,93 @@ Each row describes one data part. Columns: -- `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. +- `partition` ([String](../../sql-reference/data-types/string.md)) — The partition name. To learn what a partition is, see the description of the [ALTER](../../sql-reference/statements/alter/index.md#query_language_queries_alter) query. Formats: - - `YYYYMM` for automatic partitioning by month. - - `any_string` when partitioning manually. + - `YYYYMM` for automatic partitioning by month. + - `any_string` when partitioning manually. -- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the data part. -- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format. +- `part_type` ([String](../../sql-reference/data-types/string.md)) — The data part storing format. Possible values: - - `Wide` — Each column is stored in a separate file in a filesystem. - - `Compact` — All columns are stored in one file in a filesystem. + - `Wide` — Each column is stored in a separate file in a filesystem. + - `Compact` — All columns are stored in one file in a filesystem. Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table. -- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. +- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. -- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). +- `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). -- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows. +- `rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows. -- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of all the data part files in bytes. +- `bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of all the data part files in bytes. -- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. -- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the file with marks. +- `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the file with marks. -- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time the directory with the data part was modified. This usually corresponds to the time of data part creation. +- `modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time the directory with the data part was modified. This usually corresponds to the time of data part creation. -- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the data part became inactive. +- `remove_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the data part became inactive. -- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. +- `refcount` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of places where the data part is used. A value greater than 2 indicates that the data part is used in queries or merges. -- `min_date` ([Date](../../sql-reference/data-types/date.md)) — The minimum value of the date key in the data part. +- `min_date` ([Date](../../sql-reference/data-types/date.md)) — The minimum value of the date key in the data part. -- `max_date` ([Date](../../sql-reference/data-types/date.md)) — The maximum value of the date key in the data part. +- `max_date` ([Date](../../sql-reference/data-types/date.md)) — The maximum value of the date key in the data part. -- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. -- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The minimum number of data parts that make up the current part after merging. +- `min_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The minimum number of data parts that make up the current part after merging. -- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The maximum number of data parts that make up the current part after merging. +- `max_block_number` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The maximum number of data parts that make up the current part after merging. -- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. +- `level` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Depth of the merge tree. Zero means that the current part was created by insert rather than by merging other parts. -- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). +- `data_version` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number that is used to determine which mutations should be applied to the data part (mutations with a version higher than `data_version`). -- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) used by primary key values. +- `primary_key_bytes_in_memory` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) used by primary key values. -- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) reserved for primary key values. +- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The amount of memory (in bytes) reserved for primary key values. -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. -- `engine` ([String](../../sql-reference/data-types/string.md)) — Name of the table engine without parameters. +- `engine` ([String](../../sql-reference/data-types/string.md)) — Name of the table engine without parameters. -- `disk_name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk that stores the data part. +- `disk_name` ([String](../../sql-reference/data-types/string.md)) — Name of a disk that stores the data part. -- `path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. +- `path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the folder with data part files. -- `column` ([String](../../sql-reference/data-types/string.md)) — Name of the column. +- `column` ([String](../../sql-reference/data-types/string.md)) — Name of the column. -- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. +- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. -- `column_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. +- `column_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. -- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. +- `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. -- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. +- `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. -- `column_bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the column in bytes. +- `column_bytes_on_disk` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the column in bytes. -- `column_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the column, in bytes. +- `column_data_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of compressed data in the column, in bytes. -- `column_data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the decompressed data in the column, in bytes. +- `column_data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Total size of the decompressed data in the column, in bytes. -- `column_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the column with marks, in bytes. +- `column_marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The size of the column with marks, in bytes. -- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `bytes_on_disk`. +- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `bytes_on_disk`. -- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `marks_bytes`. +- `marks_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Alias for `marks_bytes`. **Example** @@ -146,4 +146,4 @@ column_marks_bytes: 48 **See Also** -- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) +- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md index 76219813ad7..2e729920ed0 100644 --- a/docs/en/operations/system-tables/processes.md +++ b/docs/en/operations/system-tables/processes.md @@ -7,17 +7,17 @@ This system table is used for implementing the `SHOW PROCESSLIST` query. Columns: -- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated. -- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server. -- `elapsed` (Float64) – The time in seconds since request execution started. -- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. -- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. -- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. -- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting. -- `query` (String) – The query text. For `INSERT`, it does not include the data to insert. -- `query_id` (String) – Query ID, if defined. -- `is_cancelled` (Int8) – Was query cancelled. -- `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server). +- `user` (String) – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the `default` user. The field contains the username for a specific query, not for a query that this query initiated. +- `address` (String) – The IP address the request was made from. The same for distributed processing. To track where a distributed query was originally made from, look at `system.processes` on the query requestor server. +- `elapsed` (Float64) – The time in seconds since request execution started. +- `rows_read` (UInt64) – The number of rows read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +- `bytes_read` (UInt64) – The number of uncompressed bytes read from the table. For distributed processing, on the requestor server, this is the total for all remote servers. +- `total_rows_approx` (UInt64) – The approximation of the total number of rows that should be read. For distributed processing, on the requestor server, this is the total for all remote servers. It can be updated during request processing, when new sources to process become known. +- `memory_usage` (UInt64) – Amount of RAM the request uses. It might not include some types of dedicated memory. See the [max_memory_usage](../../operations/settings/query-complexity.md#settings_max_memory_usage) setting. +- `query` (String) – The query text. For `INSERT`, it does not include the data to insert. +- `query_id` (String) – Query ID, if defined. +- `is_cancelled` (Int8) – Was query cancelled. +- `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server). ```sql SELECT * FROM system.processes LIMIT 10 FORMAT Vertical; diff --git a/docs/en/operations/system-tables/processors_profile_log.md b/docs/en/operations/system-tables/processors_profile_log.md index a2e7a9ebabd..a6ff15642a1 100644 --- a/docs/en/operations/system-tables/processors_profile_log.md +++ b/docs/en/operations/system-tables/processors_profile_log.md @@ -1,24 +1,24 @@ -# system.processors_profile_log {#system-processors_profile_log} +# processors_profile_log This table contains profiling on processors level (that you can find in [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)). Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. -- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. -- `id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of processor -- `parent_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Parent processors IDs -- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query -- `name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the processor. -- `elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was executed. -- `input_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting for data (from other processor). -- `output_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting because output port was full. -- `plan_step` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the query plan step which created this processor. The value is zero if the processor was not added from any step. -- `plan_group` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result. -- `input_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows consumed by processor. -- `input_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes consumed by processor. -- `output_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows generated by processor. -- `output_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes generated by processor. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. +- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. +- `id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of processor +- `parent_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Parent processors IDs +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query +- `name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the processor. +- `elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was executed. +- `input_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting for data (from other processor). +- `output_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting because output port was full. +- `plan_step` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the query plan step which created this processor. The value is zero if the processor was not added from any step. +- `plan_group` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Group of the processor if it was created by query plan step. A group is a logical partitioning of processors added from the same query plan step. Group is used only for beautifying the result of EXPLAIN PIPELINE result. +- `input_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows consumed by processor. +- `input_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes consumed by processor. +- `output_rows` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of rows generated by processor. +- `output_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of bytes generated by processor. **Example** Query: @@ -67,10 +67,10 @@ Result: Here you can see: -- `ExpressionTransform` was executing `sleep(1)` function, so it `work` will takes 1e6, and so `elapsed_us` > 1e6. -- `SourceFromSingleChunk` need to wait, because `ExpressionTransform` does not accept any data during execution of `sleep(1)`, so it will be in `PortFull` state for 1e6 us, and so `output_wait_elapsed_us` > 1e6. -- `LimitsCheckingTransform`/`NullSource`/`LazyOutputFormat` need to wait until `ExpressionTransform` will execute `sleep(1)` to process the result, so `input_wait_elapsed_us` > 1e6. +- `ExpressionTransform` was executing `sleep(1)` function, so it `work` will takes 1e6, and so `elapsed_us` > 1e6. +- `SourceFromSingleChunk` need to wait, because `ExpressionTransform` does not accept any data during execution of `sleep(1)`, so it will be in `PortFull` state for 1e6 us, and so `output_wait_elapsed_us` > 1e6. +- `LimitsCheckingTransform`/`NullSource`/`LazyOutputFormat` need to wait until `ExpressionTransform` will execute `sleep(1)` to process the result, so `input_wait_elapsed_us` > 1e6. **See Also** -- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) \ No newline at end of file +- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index a04214f6488..42247e6fba2 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -34,82 +34,82 @@ You can use the [log_formatted_queries](../../operations/settings/settings.md#se Columns: -- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: - - `'QueryStart' = 1` — Successful start of query execution. - - `'QueryFinish' = 2` — Successful end of query execution. - - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. - - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Query starting date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time. -- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time with microseconds precision. -- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. -- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. -- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. -- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. -- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. -- `current_database` ([String](../../sql-reference/data-types/string.md)) — Name of the current database. -- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. -- `formatted_query` ([String](../../sql-reference/data-types/string.md)) — Formatted query string. -- `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Identical hash value without the values of literals for similar queries. -- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Type of the query. -- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query. -- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query. -- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the (materialized or live) views present in the query. -- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query. -- `projections` ([String](../../sql-reference/data-types/string.md)) — Names of the projections used during the query execution. -- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. -- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. -- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. -- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query as part of distributed query execution. -- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query. -- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. -- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query. -- `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Initial query starting time (for distributed query execution). -- `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Initial query starting time with microseconds precision (for distributed query execution). -- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values: - - 1 — TCP. - - 2 — HTTP. -- `os_user` ([String](../../sql-reference/data-types/string.md)) — Operating system username who runs [clickhouse-client](../../interfaces/cli.md). -- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. -- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. -- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. -- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. - - 1 — `GET` method was used. - - 2 — `POST` method was used. -- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP header `UserAgent` passed in the HTTP query. -- `http_referer` ([String](../../sql-reference/data-types/string.md)) — HTTP header `Referer` passed in the HTTP query (contains an absolute or partial address of the page making the query). -- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP header `X-Forwarded-For` passed in the HTTP query. -- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The `quota key` specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) -- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. -- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined. -- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. -- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution. -- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. -- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. -- `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution. -- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. -- `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution. -- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. -- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. -- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. +- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values: + - `'QueryStart' = 1` — Successful start of query execution. + - `'QueryFinish' = 2` — Successful end of query execution. + - `'ExceptionBeforeStart' = 3` — Exception before the start of query execution. + - `'ExceptionWhileProcessing' = 4` — Exception during the query execution. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Query starting date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query starting time with microseconds precision. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. +- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution in milliseconds. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. +- `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. +- `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. +- `current_database` ([String](../../sql-reference/data-types/string.md)) — Name of the current database. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. +- `formatted_query` ([String](../../sql-reference/data-types/string.md)) — Formatted query string. +- `normalized_query_hash` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Identical hash value without the values of literals for similar queries. +- `query_kind` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Type of the query. +- `databases` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the databases present in the query. +- `tables` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the tables present in the query. +- `views` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the (materialized or live) views present in the query. +- `columns` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — Names of the columns present in the query. +- `projections` ([String](../../sql-reference/data-types/string.md)) — Names of the projections used during the query execution. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. +- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. +- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Query type. Possible values: + - 1 — Query was initiated by the client. + - 0 — Query was initiated by another query as part of distributed query execution. +- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the query. +- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to make the parent query. +- `initial_query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Initial query starting time (for distributed query execution). +- `initial_query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Initial query starting time with microseconds precision (for distributed query execution). +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Interface that the query was initiated from. Possible values: + - 1 — TCP. + - 2 — HTTP. +- `os_user` ([String](../../sql-reference/data-types/string.md)) — Operating system username who runs [clickhouse-client](../../interfaces/cli.md). +- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. +- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. +- `http_method` (UInt8) — HTTP method that initiated the query. Possible values: + - 0 — The query was launched from the TCP interface. + - 1 — `GET` method was used. + - 2 — `POST` method was used. +- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — HTTP header `UserAgent` passed in the HTTP query. +- `http_referer` ([String](../../sql-reference/data-types/string.md)) — HTTP header `Referer` passed in the HTTP query (contains an absolute or partial address of the page making the query). +- `forwarded_for` ([String](../../sql-reference/data-types/string.md)) — HTTP header `X-Forwarded-For` passed in the HTTP query. +- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The `quota key` specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) +- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. +- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined. +- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. +- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution. +- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. +- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. +- `used_data_type_families` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `data type families`, which were used during query execution. +- `used_dictionaries` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `dictionaries`, which were used during query execution. +- `used_formats` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `formats`, which were used during query execution. +- `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. +- `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. +- `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. **Example** @@ -189,4 +189,4 @@ used_table_functions: [] **See Also** -- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. +- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index d7bbaa63471..cdd23bb15db 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -18,52 +18,52 @@ You can use the [log_queries_probability](../../operations/settings/settings.md# Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. -- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision. -- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. -- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. -- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. -- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. -- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. -- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. -- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread. -- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID. -- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID. -- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. -- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. -- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: - - 1 — Query was initiated by the client. - - 0 — Query was initiated by another query for distributed query execution. -- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. -- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). -- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. -- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. -- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: - - 1 — TCP. - - 2 — HTTP. -- `os_user` ([String](../../sql-reference/data-types/string.md)) — OS’s username who runs [clickhouse-client](../../interfaces/cli.md). -- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. -- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. -- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. -- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. -- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values: - - 0 — The query was launched from the TCP interface. - - 1 — `GET` method was used. - - 2 — `POST` method was used. -- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. -- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. -- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events). +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query. +- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision. +- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of query execution. +- `query_start_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Start time of query execution with microsecond precision. +- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of query execution. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The difference between the amount of allocated and freed memory in context of this thread. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this thread. +- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Name of the thread. +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Internal thread ID. +- `thread_id` ([Int32](../../sql-reference/data-types/int-uint.md)) — thread ID. +- `master_thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — OS initial ID of initial thread. +- `query` ([String](../../sql-reference/data-types/string.md)) — Query string. +- `is_initial_query` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Query type. Possible values: + - 1 — Query was initiated by the client. + - 0 — Query was initiated by another query for distributed query execution. +- `user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who initiated the current query. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that was used to make the query. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the query. +- `initial_user` ([String](../../sql-reference/data-types/string.md)) — Name of the user who ran the initial query (for distributed query execution). +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `initial_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address that the parent query was launched from. +- `initial_port` ([UInt16](../../sql-reference/data-types/int-uint.md#uint-ranges)) — The client port that was used to make the parent query. +- `interface` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Interface that the query was initiated from. Possible values: + - 1 — TCP. + - 2 — HTTP. +- `os_user` ([String](../../sql-reference/data-types/string.md)) — OS’s username who runs [clickhouse-client](../../interfaces/cli.md). +- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. +- `client_name` ([String](../../sql-reference/data-types/string.md)) — The [clickhouse-client](../../interfaces/cli.md) or another TCP client name. +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Major version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Minor version of the [clickhouse-client](../../interfaces/cli.md) or another TCP client. +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the [clickhouse-client](../../interfaces/cli.md) or another TCP client version. +- `http_method` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — HTTP method that initiated the query. Possible values: + - 0 — The query was launched from the TCP interface. + - 1 — `GET` method was used. + - 2 — `POST` method was used. +- `http_user_agent` ([String](../../sql-reference/data-types/string.md)) — The `UserAgent` header passed in the HTTP request. +- `quota_key` ([String](../../sql-reference/data-types/string.md)) — The “quota key” specified in the [quotas](../../operations/quotas.md) setting (see `keyed`). +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse revision. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics for this thread. The description of them could be found in the table [system.events](#system_tables-events). **Example** @@ -116,5 +116,5 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr **See Also** -- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. -- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — This table contains information about each view executed during a query. +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. +- [system.query_views_log](../../operations/system-tables/query_views_log.md#system_tables-query_views_log) — This table contains information about each view executed during a query. diff --git a/docs/en/operations/system-tables/query_views_log.md b/docs/en/operations/system-tables/query_views_log.md index 55cab49e52f..e107e4f926c 100644 --- a/docs/en/operations/system-tables/query_views_log.md +++ b/docs/en/operations/system-tables/query_views_log.md @@ -18,33 +18,33 @@ You can use the [log_queries_probability](../../operations/settings/settings.md# Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the last event of the view happened. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution. -- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution with microseconds precision. -- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of view execution (sum of its stages) in milliseconds. -- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). -- `view_name` ([String](../../sql-reference/data-types/string.md)) — Name of the view. -- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — UUID of the view. -- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the view. Values: - - `'Default' = 1` — [Default views](../../sql-reference/statements/create/view.md#normal). Should not appear in this log. - - `'Materialized' = 2` — [Materialized views](../../sql-reference/statements/create/view.md#materialized). - - `'Live' = 3` — [Live views](../../sql-reference/statements/create/view.md#live-view). -- `view_query` ([String](../../sql-reference/data-types/string.md)) — The query executed by the view. -- `view_target` ([String](../../sql-reference/data-types/string.md)) — The name of the view target table. -- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. -- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. -- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of written rows. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of written bytes. -- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this view. -- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events). -- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the view. Values: - - `'QueryStart' = 1` — Successful start the view execution. Should not appear. - - `'QueryFinish' = 2` — Successful end of the view execution. - - `'ExceptionBeforeStart' = 3` — Exception before the start of the view execution. - - `'ExceptionWhileProcessing' = 4` — Exception during the view execution. -- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. -- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. -- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the last event of the view happened. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution. +- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution with microseconds precision. +- `view_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Duration of view execution (sum of its stages) in milliseconds. +- `initial_query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the initial query (for distributed query execution). +- `view_name` ([String](../../sql-reference/data-types/string.md)) — Name of the view. +- `view_uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — UUID of the view. +- `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the view. Values: + - `'Default' = 1` — [Default views](../../sql-reference/statements/create/view.md#normal). Should not appear in this log. + - `'Materialized' = 2` — [Materialized views](../../sql-reference/statements/create/view.md#materialized). + - `'Live' = 3` — [Live views](../../sql-reference/statements/create/view.md#live-view). +- `view_query` ([String](../../sql-reference/data-types/string.md)) — The query executed by the view. +- `view_target` ([String](../../sql-reference/data-types/string.md)) — The name of the view target table. +- `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read rows. +- `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of read bytes. +- `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of written rows. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of written bytes. +- `peak_memory_usage` ([Int64](../../sql-reference/data-types/int-uint.md)) — The maximum difference between the amount of allocated and freed memory in context of this view. +- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the view. Values: + - `'QueryStart' = 1` — Successful start the view execution. Should not appear. + - `'QueryFinish' = 2` — Successful end of the view execution. + - `'ExceptionBeforeStart' = 3` — Exception before the start of the view execution. + - `'ExceptionWhileProcessing' = 4` — Exception during the view execution. +- `exception_code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code of an exception. +- `exception` ([String](../../sql-reference/data-types/string.md)) — Exception message. +- `stack_trace` ([String](../../sql-reference/data-types/string.md)) — [Stack trace](https://en.wikipedia.org/wiki/Stack_trace). An empty string, if the query was completed successfully. **Example** @@ -83,5 +83,5 @@ stack_trace: **See Also** -- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. -- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. +- [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. +- [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 7286ad9efa9..0dca7c525f2 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -30,4 +30,4 @@ Columns: ## See Also {#see-also} -- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) +- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index ffe7a95df5b..a9748a2b464 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -25,4 +25,4 @@ Columns: ## See Also {#see-also} -- [SHOW QUOTAS](../../sql-reference/statements/show.md#show-quotas-statement) +- [SHOW QUOTAS](../../sql-reference/statements/show.md#show-quotas-statement) diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index e29ffff6b41..a04018ac2c8 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -33,4 +33,4 @@ Columns: ## See Also {#see-also} -- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) +- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index e711d9a7784..122a03ca629 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -50,6 +50,7 @@ last_queue_update: 2021-10-12 14:50:08 absolute_delay: 99 total_replicas: 5 active_replicas: 5 +lost_part_count: 0 last_queue_update_exception: zookeeper_exception: replica_is_active: {'r1':1,'r2':1} @@ -57,42 +58,43 @@ replica_is_active: {'r1':1,'r2':1} Columns: -- `database` (`String`) - Database name -- `table` (`String`) - Table name -- `engine` (`String`) - Table engine name -- `is_leader` (`UInt8`) - Whether the replica is the leader. +- `database` (`String`) - Database name +- `table` (`String`) - Table name +- `engine` (`String`) - Table engine name +- `is_leader` (`UInt8`) - Whether the replica is the leader. Multiple replicas can be leaders at the same time. A replica can be prevented from becoming a leader using the `merge_tree` setting `replicated_can_become_leader`. The leaders are responsible for scheduling background merges. Note that writes can be performed to any replica that is available and has a session in ZK, regardless of whether it is a leader. -- `can_become_leader` (`UInt8`) - Whether the replica can be a leader. -- `is_readonly` (`UInt8`) - Whether the replica is in read-only mode. +- `can_become_leader` (`UInt8`) - Whether the replica can be a leader. +- `is_readonly` (`UInt8`) - Whether the replica is in read-only mode. This mode is turned on if the config does not have sections with ClickHouse Keeper, if an unknown error occurred when reinitializing sessions in ClickHouse Keeper, and during session reinitialization in ClickHouse Keeper. -- `is_session_expired` (`UInt8`) - the session with ClickHouse Keeper has expired. Basically the same as `is_readonly`. -- `future_parts` (`UInt32`) - The number of data parts that will appear as the result of INSERTs or merges that haven’t been done yet. -- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged. -- `zookeeper_path` (`String`) - Path to table data in ClickHouse Keeper. -- `replica_name` (`String`) - Replica name in ClickHouse Keeper. Different replicas of the same table have different names. -- `replica_path` (`String`) - Path to replica data in ClickHouse Keeper. The same as concatenating ‘zookeeper_path/replicas/replica_path’. -- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven’t made all of the ALTERs yet. -- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`. -- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. -- `merges_in_queue` (`UInt32`) - The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time. -- `part_mutations_in_queue` (`UInt32`) - The number of mutations waiting to be made. -- `queue_oldest_time` (`DateTime`) - If `queue_size` greater than 0, shows when the oldest operation was added to the queue. -- `inserts_oldest_time` (`DateTime`) - See `queue_oldest_time` -- `merges_oldest_time` (`DateTime`) - See `queue_oldest_time` -- `part_mutations_oldest_time` (`DateTime`) - See `queue_oldest_time` +- `is_session_expired` (`UInt8`) - the session with ClickHouse Keeper has expired. Basically the same as `is_readonly`. +- `future_parts` (`UInt32`) - The number of data parts that will appear as the result of INSERTs or merges that haven’t been done yet. +- `parts_to_check` (`UInt32`) - The number of data parts in the queue for verification. A part is put in the verification queue if there is suspicion that it might be damaged. +- `zookeeper_path` (`String`) - Path to table data in ClickHouse Keeper. +- `replica_name` (`String`) - Replica name in ClickHouse Keeper. Different replicas of the same table have different names. +- `replica_path` (`String`) - Path to replica data in ClickHouse Keeper. The same as concatenating ‘zookeeper_path/replicas/replica_path’. +- `columns_version` (`Int32`) - Version number of the table structure. Indicates how many times ALTER was performed. If replicas have different versions, it means some replicas haven’t made all of the ALTERs yet. +- `queue_size` (`UInt32`) - Size of the queue for operations waiting to be performed. Operations include inserting blocks of data, merges, and certain other actions. It usually coincides with `future_parts`. +- `inserts_in_queue` (`UInt32`) - Number of inserts of blocks of data that need to be made. Insertions are usually replicated fairly quickly. If this number is large, it means something is wrong. +- `merges_in_queue` (`UInt32`) - The number of merges waiting to be made. Sometimes merges are lengthy, so this value may be greater than zero for a long time. +- `part_mutations_in_queue` (`UInt32`) - The number of mutations waiting to be made. +- `queue_oldest_time` (`DateTime`) - If `queue_size` greater than 0, shows when the oldest operation was added to the queue. +- `inserts_oldest_time` (`DateTime`) - See `queue_oldest_time` +- `merges_oldest_time` (`DateTime`) - See `queue_oldest_time` +- `part_mutations_oldest_time` (`DateTime`) - See `queue_oldest_time` The next 4 columns have a non-zero value only where there is an active session with ZK. -- `log_max_index` (`UInt64`) - Maximum entry number in the log of general activity. -- `log_pointer` (`UInt64`) - Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. If `log_pointer` is much smaller than `log_max_index`, something is wrong. -- `last_queue_update` (`DateTime`) - When the queue was updated last time. -- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has. -- `total_replicas` (`UInt8`) - The total number of known replicas of this table. -- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas). -- `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions. -- `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ClickHouse Keeper. -- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active. +- `log_max_index` (`UInt64`) - Maximum entry number in the log of general activity. +- `log_pointer` (`UInt64`) - Maximum entry number in the log of general activity that the replica copied to its execution queue, plus one. If `log_pointer` is much smaller than `log_max_index`, something is wrong. +- `last_queue_update` (`DateTime`) - When the queue was updated last time. +- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has. +- `total_replicas` (`UInt8`) - The total number of known replicas of this table. +- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas). +- `lost_part_count` (`UInt64`) - The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase. +- `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions. +- `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ClickHouse Keeper. +- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active. If you request all the columns, the table may work a bit slowly, since several reads from ClickHouse Keeper are made for each row. If you do not request the last 4 columns (log_max_index, log_pointer, total_replicas, active_replicas), the table works quickly. diff --git a/docs/en/operations/system-tables/replicated_fetches.md b/docs/en/operations/system-tables/replicated_fetches.md index 9bd068e3c58..0f9be7bac83 100644 --- a/docs/en/operations/system-tables/replicated_fetches.md +++ b/docs/en/operations/system-tables/replicated_fetches.md @@ -7,37 +7,37 @@ Contains information about currently running background fetches. Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. -- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started. +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — The time elapsed (in seconds) since showing currently running background fetches started. -- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1. +- `progress` ([Float64](../../sql-reference/data-types/float.md)) — The percentage of completed work from 0 to 1. -- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches. +- `result_part_name` ([String](../../sql-reference/data-types/string.md)) — The name of the part that will be formed as the result of showing currently running background fetches. -- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches. +- `result_part_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the part that will be formed as the result of showing currently running background fetches. -- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. +- `partition_id` ([String](../../sql-reference/data-types/string.md)) — ID of the partition. -- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part. +- `total_size_bytes_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The total size (in bytes) of the compressed data in the result part. -- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part. +- `bytes_read_compressed` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of compressed bytes read from the result part. -- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica. +- `source_replica_path` ([String](../../sql-reference/data-types/string.md)) — Absolute path to the source replica. -- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica. +- `source_replica_hostname` ([String](../../sql-reference/data-types/string.md)) — Hostname of the source replica. -- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica. +- `source_replica_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Port number of the source replica. -- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme. +- `interserver_scheme` ([String](../../sql-reference/data-types/string.md)) — Name of the interserver scheme. -- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier. +- `URI` ([String](../../sql-reference/data-types/string.md)) — Uniform resource identifier. -- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression. +- `to_detached` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the currently running background fetch is being performed using the `TO DETACHED` expression. -- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. **Example** @@ -68,4 +68,4 @@ thread_id: 54 **See Also** -- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md/#managing-replicatedmergetree-tables) +- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md/#managing-replicatedmergetree-tables) diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md index dcc61b8e6a7..dd8f6328688 100644 --- a/docs/en/operations/system-tables/replication_queue.md +++ b/docs/en/operations/system-tables/replication_queue.md @@ -7,55 +7,55 @@ Contains information about tasks from replication queues stored in ClickHouse Ke Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. +- `database` ([String](../../sql-reference/data-types/string.md)) — Name of the database. -- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. +- `table` ([String](../../sql-reference/data-types/string.md)) — Name of the table. -- `replica_name` ([String](../../sql-reference/data-types/string.md)) — Replica name in ClickHouse Keeper. Different replicas of the same table have different names. +- `replica_name` ([String](../../sql-reference/data-types/string.md)) — Replica name in ClickHouse Keeper. Different replicas of the same table have different names. -- `position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Position of the task in the queue. +- `position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Position of the task in the queue. -- `node_name` ([String](../../sql-reference/data-types/string.md)) — Node name in ClickHouse Keeper. +- `node_name` ([String](../../sql-reference/data-types/string.md)) — Node name in ClickHouse Keeper. -- `type` ([String](../../sql-reference/data-types/string.md)) — Type of the task in the queue, one of: +- `type` ([String](../../sql-reference/data-types/string.md)) — Type of the task in the queue, one of: - - `GET_PART` — Get the part from another replica. - - `ATTACH_PART` — Attach the part, possibly from our own replica (if found in the `detached` folder). You may think of it as a `GET_PART` with some optimizations as they're nearly identical. - - `MERGE_PARTS` — Merge the parts. - - `DROP_RANGE` — Delete the parts in the specified partition in the specified number range. - - `CLEAR_COLUMN` — NOTE: Deprecated. Drop specific column from specified partition. - - `CLEAR_INDEX` — NOTE: Deprecated. Drop specific index from specified partition. - - `REPLACE_RANGE` — Drop a certain range of parts and replace them with new ones. - - `MUTATE_PART` — Apply one or several mutations to the part. - - `ALTER_METADATA` — Apply alter modification according to global /metadata and /columns paths. + - `GET_PART` — Get the part from another replica. + - `ATTACH_PART` — Attach the part, possibly from our own replica (if found in the `detached` folder). You may think of it as a `GET_PART` with some optimizations as they're nearly identical. + - `MERGE_PARTS` — Merge the parts. + - `DROP_RANGE` — Delete the parts in the specified partition in the specified number range. + - `CLEAR_COLUMN` — NOTE: Deprecated. Drop specific column from specified partition. + - `CLEAR_INDEX` — NOTE: Deprecated. Drop specific index from specified partition. + - `REPLACE_RANGE` — Drop a certain range of parts and replace them with new ones. + - `MUTATE_PART` — Apply one or several mutations to the part. + - `ALTER_METADATA` — Apply alter modification according to global /metadata and /columns paths. -- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution. +- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution. -- `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of replicas waiting for the task to complete with confirmation of completion. This column is only relevant for the `GET_PARTS` task. +- `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of replicas waiting for the task to complete with confirmation of completion. This column is only relevant for the `GET_PARTS` task. -- `source_replica` ([String](../../sql-reference/data-types/string.md)) — Name of the source replica. +- `source_replica` ([String](../../sql-reference/data-types/string.md)) — Name of the source replica. -- `new_part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the new part. +- `new_part_name` ([String](../../sql-reference/data-types/string.md)) — Name of the new part. -- `parts_to_merge` ([Array](../../sql-reference/data-types/array.md) ([String](../../sql-reference/data-types/string.md))) — Names of parts to merge or update. +- `parts_to_merge` ([Array](../../sql-reference/data-types/array.md) ([String](../../sql-reference/data-types/string.md))) — Names of parts to merge or update. -- `is_detach` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the `DETACH_PARTS` task is in the queue. +- `is_detach` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether the `DETACH_PARTS` task is in the queue. -- `is_currently_executing` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether a specific task is being performed right now. +- `is_currently_executing` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag indicates whether a specific task is being performed right now. -- `num_tries` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of failed attempts to complete the task. +- `num_tries` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of failed attempts to complete the task. -- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). +- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any). -- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted. +- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted. -- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks. +- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks. -- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed. +- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed. -- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last postponed. +- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last postponed. -- `merge_type` ([String](../../sql-reference/data-types/string.md)) — Type of the current merge. Empty if it's a mutation. +- `merge_type` ([String](../../sql-reference/data-types/string.md)) — Type of the current merge. Empty if it's a mutation. **Example** @@ -89,4 +89,4 @@ last_postpone_time: 1970-01-01 03:00:00 **See Also** -- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md#query-language-system-replicated) +- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md#query-language-system-replicated) diff --git a/docs/en/operations/system-tables/role-grants.md b/docs/en/operations/system-tables/role-grants.md index 22f69fce7fb..ffe1f1d74e2 100644 --- a/docs/en/operations/system-tables/role-grants.md +++ b/docs/en/operations/system-tables/role-grants.md @@ -14,9 +14,9 @@ Columns: - `granted_role_name` ([String](../../sql-reference/data-types/string.md)) — Name of role granted to the `role_name` role. To grant one role to another one use `GRANT role1 TO role2`. - `granted_role_is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `granted_role` is a default role. Possible values: - - 1 — `granted_role` is a default role. - - 0 — `granted_role` is not a default role. + - 1 — `granted_role` is a default role. + - 0 — `granted_role` is not a default role. - `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `granted_role` is a role with [ADMIN OPTION](../../sql-reference/statements/grant.md#admin-option-privilege) privilege. Possible values: - - 1 — The role has `ADMIN OPTION` privilege. - - 0 — The role without `ADMIN OPTION` privilege. + - 1 — The role has `ADMIN OPTION` privilege. + - 0 — The role without `ADMIN OPTION` privilege. diff --git a/docs/en/operations/system-tables/roles.md b/docs/en/operations/system-tables/roles.md index 5ef5e765c0f..1614e0580b8 100644 --- a/docs/en/operations/system-tables/roles.md +++ b/docs/en/operations/system-tables/roles.md @@ -13,4 +13,4 @@ Columns: ## See Also {#see-also} -- [SHOW ROLES](../../sql-reference/statements/show.md#show-roles-statement) +- [SHOW ROLES](../../sql-reference/statements/show.md#show-roles-statement) diff --git a/docs/en/operations/system-tables/row_policies.md b/docs/en/operations/system-tables/row_policies.md index b21a9500825..2c4d060ce66 100644 --- a/docs/en/operations/system-tables/row_policies.md +++ b/docs/en/operations/system-tables/row_policies.md @@ -6,30 +6,30 @@ slug: /en/operations/system-tables/row_policies Contains filters for one particular table, as well as a list of roles and/or users which should use this row policy. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a row policy. +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of a row policy. -- `short_name` ([String](../../sql-reference/data-types/string.md)) — Short name of a row policy. Names of row policies are compound, for example: myfilter ON mydb.mytable. Here "myfilter ON mydb.mytable" is the name of the row policy, "myfilter" is it's short name. +- `short_name` ([String](../../sql-reference/data-types/string.md)) — Short name of a row policy. Names of row policies are compound, for example: myfilter ON mydb.mytable. Here "myfilter ON mydb.mytable" is the name of the row policy, "myfilter" is it's short name. -- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. +- `database` ([String](../../sql-reference/data-types/string.md)) — Database name. -- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `table` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Row policy ID. +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Row policy ID. -- `storage` ([String](../../sql-reference/data-types/string.md)) — Name of the directory where the row policy is stored. +- `storage` ([String](../../sql-reference/data-types/string.md)) — Name of the directory where the row policy is stored. -- `select_filter` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Condition which is used to filter rows. +- `select_filter` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Condition which is used to filter rows. -- `is_restrictive` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the row policy restricts access to rows, see [CREATE ROW POLICY](../../sql-reference/statements/create/row-policy.md#create-row-policy-as). Value: +- `is_restrictive` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the row policy restricts access to rows, see [CREATE ROW POLICY](../../sql-reference/statements/create/row-policy.md#create-row-policy-as). Value: - `0` — The row policy is defined with `AS PERMISSIVE` clause. - `1` — The row policy is defined with `AS RESTRICTIVE` clause. -- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that the row policies set for all roles and/or users. +- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that the row policies set for all roles and/or users. -- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of the roles and/or users to which the row policies is applied. +- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of the roles and/or users to which the row policies is applied. -- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — The row policies is applied to all roles and/or users excepting of the listed ones. +- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — The row policies is applied to all roles and/or users excepting of the listed ones. ## See Also {#see-also} -- [SHOW POLICIES](../../sql-reference/statements/show.md#show-policies-statement) +- [SHOW POLICIES](../../sql-reference/statements/show.md#show-policies-statement) diff --git a/docs/en/operations/system-tables/schema_inference_cache.md b/docs/en/operations/system-tables/schema_inference_cache.md index 8a65f29bc30..8624ee9cec9 100644 --- a/docs/en/operations/system-tables/schema_inference_cache.md +++ b/docs/en/operations/system-tables/schema_inference_cache.md @@ -66,4 +66,4 @@ schema: id Nullable(Float64), age Nullable(Float64), name Nullab **See also** -- [Automatic schema inference from input data](/docs/en/interfaces/schema-inference.md) +- [Automatic schema inference from input data](/docs/en/interfaces/schema-inference.md) diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md index e1bf8c3d63f..3085b1acaf4 100644 --- a/docs/en/operations/system-tables/server_settings.md +++ b/docs/en/operations/system-tables/server_settings.md @@ -8,12 +8,12 @@ Currently, the table shows only settings from the first layer of `config.xml` an Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Server setting name. -- `value` ([String](../../sql-reference/data-types/string.md)) — Server setting value. -- `default` ([String](../../sql-reference/data-types/string.md)) — Server setting default value. -- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml` -- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description. -- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type. +- `name` ([String](../../sql-reference/data-types/string.md)) — Server setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Server setting value. +- `default` ([String](../../sql-reference/data-types/string.md)) — Server setting default value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml` +- `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description. +- `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type. **Example** @@ -47,6 +47,6 @@ SELECT * FROM system.server_settings WHERE changed AND name='max_thread_pool_siz **See also** -- [Settings](../../operations/system-tables/settings.md) -- [Configuration Files](../../operations/configuration-files.md) -- [Server Settings](../../operations/server-configuration-parameters/settings.md) +- [Settings](../../operations/system-tables/settings.md) +- [Configuration Files](../../operations/configuration-files.md) +- [Server Settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/system-tables/session_log.md b/docs/en/operations/system-tables/session_log.md index cdf86b57ef6..661d34677e4 100644 --- a/docs/en/operations/system-tables/session_log.md +++ b/docs/en/operations/system-tables/session_log.md @@ -7,42 +7,42 @@ Contains information about all successful and failed login and logout events. Columns: -- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Login/logout result. Possible values: - - `LoginFailure` — Login error. - - `LoginSuccess` — Successful login. - - `Logout` — Logout from the system. -- `auth_id` ([UUID](../../sql-reference/data-types/uuid.md)) — Authentication ID, which is a UUID that is automatically generated each time user logins. -- `session_id` ([String](../../sql-reference/data-types/string.md)) — Session ID that is passed by client via [HTTP](../../interfaces/http.md) interface. -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Login/logout date. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Login/logout time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Login/logout starting time with microseconds precision. -- `user` ([String](../../sql-reference/data-types/string.md)) — User name. -- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)) — The authentication type. Possible values: - - `NO_PASSWORD` - - `PLAINTEXT_PASSWORD` - - `SHA256_PASSWORD` - - `DOUBLE_SHA1_PASSWORD` - - `LDAP` - - `KERBEROS` - - `SSL_CERTIFICATE` -- `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of profiles set for all roles and/or users. -- `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of roles to which the profile is applied. -- `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — Settings that were changed when the client logged in/out. -- `client_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — The IP address that was used to log in/out. -- `client_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to log in/out. -- `interface` ([Enum8](../../sql-reference/data-types/enum.md)) — The interface from which the login was initiated. Possible values: - - `TCP` - - `HTTP` - - `gRPC` - - `MySQL` - - `PostgreSQL` -- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — The hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. -- `client_name` ([String](../../sql-reference/data-types/string.md)) — The `clickhouse-client` or another TCP client name. -- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the `clickhouse-client` or another TCP client. -- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The major version of the `clickhouse-client` or another TCP client. -- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The minor version of the `clickhouse-client` or another TCP client. -- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the `clickhouse-client` or another TCP client version. -- `failure_reason` ([String](../../sql-reference/data-types/string.md)) — The exception message containing the reason for the login/logout failure. +- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Login/logout result. Possible values: + - `LoginFailure` — Login error. + - `LoginSuccess` — Successful login. + - `Logout` — Logout from the system. +- `auth_id` ([UUID](../../sql-reference/data-types/uuid.md)) — Authentication ID, which is a UUID that is automatically generated each time user logins. +- `session_id` ([String](../../sql-reference/data-types/string.md)) — Session ID that is passed by client via [HTTP](../../interfaces/http.md) interface. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Login/logout date. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Login/logout time. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Login/logout starting time with microseconds precision. +- `user` ([String](../../sql-reference/data-types/string.md)) — User name. +- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)) — The authentication type. Possible values: + - `NO_PASSWORD` + - `PLAINTEXT_PASSWORD` + - `SHA256_PASSWORD` + - `DOUBLE_SHA1_PASSWORD` + - `LDAP` + - `KERBEROS` + - `SSL_CERTIFICATE` +- `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of profiles set for all roles and/or users. +- `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of roles to which the profile is applied. +- `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — Settings that were changed when the client logged in/out. +- `client_address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — The IP address that was used to log in/out. +- `client_port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The client port that was used to log in/out. +- `interface` ([Enum8](../../sql-reference/data-types/enum.md)) — The interface from which the login was initiated. Possible values: + - `TCP` + - `HTTP` + - `gRPC` + - `MySQL` + - `PostgreSQL` +- `client_hostname` ([String](../../sql-reference/data-types/string.md)) — The hostname of the client machine where the [clickhouse-client](../../interfaces/cli.md) or another TCP client is run. +- `client_name` ([String](../../sql-reference/data-types/string.md)) — The `clickhouse-client` or another TCP client name. +- `client_revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Revision of the `clickhouse-client` or another TCP client. +- `client_version_major` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The major version of the `clickhouse-client` or another TCP client. +- `client_version_minor` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The minor version of the `clickhouse-client` or another TCP client. +- `client_version_patch` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Patch component of the `clickhouse-client` or another TCP client version. +- `failure_reason` ([String](../../sql-reference/data-types/string.md)) — The exception message containing the reason for the login/logout failure. **Example** diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index a3dfa937abe..afae45077cc 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -7,16 +7,16 @@ Contains information about session settings for current user. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name. -- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value. -- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. -- `description` ([String](../../sql-reference/data-types/string.md)) — Short setting description. -- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal). -- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal). -- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: - - `0` — Current user can change the setting. - - `1` — Current user can’t change the setting. -- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value. +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting is changed from its default value. +- `description` ([String](../../sql-reference/data-types/string.md)) — Short setting description. +- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: + - `0` — Current user can change the setting. + - `1` — Current user can’t change the setting. +- `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value. **Example** @@ -38,8 +38,8 @@ WHERE name LIKE '%min_i%' Using of `WHERE changed` can be useful, for example, when you want to check: -- Whether settings in configuration files are loaded correctly and are in use. -- Settings that changed in the current session. +- Whether settings in configuration files are loaded correctly and are in use. +- Settings that changed in the current session. @@ -49,7 +49,7 @@ SELECT * FROM system.settings WHERE changed AND name='load_balancing' **See also** -- [Settings](../../operations/settings/index.md#session-settings-intro) -- [Permissions for Queries](../../operations/settings/permissions-for-queries.md#settings_readonly) -- [Constraints on Settings](../../operations/settings/constraints-on-settings.md) -- [SHOW SETTINGS](../../sql-reference/statements/show.md#show-settings) statement +- [Settings](../../operations/settings/index.md#session-settings-intro) +- [Permissions for Queries](../../operations/settings/permissions-for-queries.md#settings_readonly) +- [Constraints on Settings](../../operations/settings/constraints-on-settings.md) +- [SHOW SETTINGS](../../sql-reference/statements/show.md#show-settings) statement diff --git a/docs/en/operations/system-tables/settings_profile_elements.md b/docs/en/operations/system-tables/settings_profile_elements.md index f4c77dfc76a..c1fc562e1e9 100644 --- a/docs/en/operations/system-tables/settings_profile_elements.md +++ b/docs/en/operations/system-tables/settings_profile_elements.md @@ -10,22 +10,22 @@ Describes the content of the settings profile: - Parent settings profiles. Columns: -- `profile_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting profile name. +- `profile_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting profile name. -- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — User name. +- `user_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — User name. -- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Role name. +- `role_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Role name. -- `index` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Sequential number of the settings profile element. +- `index` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Sequential number of the settings profile element. -- `setting_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting name. +- `setting_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting name. -- `value` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting value. +- `value` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Setting value. -- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The minimum value of the setting. `NULL` if not set. +- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The minimum value of the setting. `NULL` if not set. -- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The maximum value of the setting. NULL if not set. +- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The maximum value of the setting. NULL if not set. -- `readonly` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges))) — Profile that allows only read queries. +- `readonly` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges))) — Profile that allows only read queries. -- `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles. +- `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles. diff --git a/docs/en/operations/system-tables/settings_profiles.md b/docs/en/operations/system-tables/settings_profiles.md index ef9068ae1b8..635a4e47dfd 100644 --- a/docs/en/operations/system-tables/settings_profiles.md +++ b/docs/en/operations/system-tables/settings_profiles.md @@ -6,20 +6,20 @@ slug: /en/operations/system-tables/settings_profiles Contains properties of configured setting profiles. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — Setting profile name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting profile name. -- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Setting profile ID. +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — Setting profile ID. -- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of setting profiles. Configured in the `access_control_path` parameter. +- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of setting profiles. Configured in the `access_control_path` parameter. -- `num_elements` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of elements for this profile in the `system.settings_profile_elements` table. +- `num_elements` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of elements for this profile in the `system.settings_profile_elements` table. -- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that the settings profile set for all roles and/or users. +- `apply_to_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that the settings profile set for all roles and/or users. -- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of the roles and/or users to which the setting profile is applied. +- `apply_to_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of the roles and/or users to which the setting profile is applied. -- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — The setting profile is applied to all roles and/or users excepting of the listed ones. +- `apply_to_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — The setting profile is applied to all roles and/or users excepting of the listed ones. ## See Also {#see-also} -- [SHOW PROFILES](../../sql-reference/statements/show.md#show-profiles-statement) +- [SHOW PROFILES](../../sql-reference/statements/show.md#show-profiles-statement) diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md index d6963160399..52ee7088597 100644 --- a/docs/en/operations/system-tables/stack_trace.md +++ b/docs/en/operations/system-tables/stack_trace.md @@ -9,10 +9,10 @@ To analyze stack frames, use the `addressToLine`, `addressToLineWithInlines`, `a Columns: -- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Thread name. -- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](../system-tables/query_log.md) system table. -- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored. +- `thread_name` ([String](../../sql-reference/data-types/string.md)) — Thread name. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](../system-tables/query_log.md) system table. +- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) which represents a list of physical addresses where the called methods are stored. :::tip Check out the Knowledge Base for some handy queries, including [how to see what threads are currently running](https://clickhouse.com/docs/knowledgebase/find-expensive-queries) and [useful queries for troubleshooting](https://clickhouse.com/docs/knowledgebase/useful-queries-for-troubleshooting). @@ -92,7 +92,7 @@ res: /lib/x86_64-linux-gnu/libc-2.27.so **See Also** -- [Introspection Functions](../../sql-reference/functions/introspection.md) — Which introspection functions are available and how to use them. -- [system.trace_log](../system-tables/trace_log.md) — Contains stack traces collected by the sampling query profiler. -- [arrayMap](../../sql-reference/functions/array-functions.md#array-map) — Description and usage example of the `arrayMap` function. -- [arrayFilter](../../sql-reference/functions/array-functions.md#array-filter) — Description and usage example of the `arrayFilter` function. +- [Introspection Functions](../../sql-reference/functions/introspection.md) — Which introspection functions are available and how to use them. +- [system.trace_log](../system-tables/trace_log.md) — Contains stack traces collected by the sampling query profiler. +- [arrayMap](../../sql-reference/functions/array-functions.md#array-map) — Description and usage example of the `arrayMap` function. +- [arrayFilter](../../sql-reference/functions/array-functions.md#array-filter) — Description and usage example of the `arrayFilter` function. diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md index 966b677c7e3..5c7184b2b22 100644 --- a/docs/en/operations/system-tables/storage_policies.md +++ b/docs/en/operations/system-tables/storage_policies.md @@ -7,12 +7,20 @@ Contains information about storage policies and volumes defined in the [server c Columns: -- `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy. -- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. -- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration, the data fills the volumes according this priority, i.e. data during inserts and merges is written to volumes with a lower priority (taking into account other rules: TTL, `max_data_part_size`, `move_factor`). -- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. -- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). -- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. -- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `policy_name` ([String](../../sql-reference/data-types/string.md)) — Name of the storage policy. +- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy. +- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration, the data fills the volumes according this priority, i.e. data during inserts and merges is written to volumes with a lower priority (taking into account other rules: TTL, `max_data_part_size`, `move_factor`). +- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy. +- `volume_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of volume. Can have one of the following values: + - `JBOD` + - `SINGLE_DISK` + - `UNKNOWN` +- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit). +- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order. +- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks. +- `perform_ttl_move_on_insert` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `perform_ttl_move_on_insert` setting. — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3). +- `load_balancing` ([Enum8](../../sql-reference/data-types/enum.md)) — Policy for disk balancing. Can have one of the following values: + - `ROUND_ROBIN` + - `LEAST_USED` If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 5178f1640c7..08594739ecf 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -7,14 +7,14 @@ Contains description of table engines supported by server and their feature supp This table contains the following columns (the column type is shown in brackets): -- `name` (String) — The name of table engine. -- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` clause. -- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [skipping indices](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). -- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). -- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. -- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md). -- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. -- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting). +- `name` (String) — The name of table engine. +- `supports_settings` (UInt8) — Flag that indicates if table engine supports `SETTINGS` clause. +- `supports_skipping_indices` (UInt8) — Flag that indicates if table engine supports [skipping indices](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes). +- `supports_ttl` (UInt8) — Flag that indicates if table engine supports [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). +- `supports_sort_order` (UInt8) — Flag that indicates if table engine supports clauses `PARTITION_BY`, `PRIMARY_KEY`, `ORDER_BY` and `SAMPLE_BY`. +- `supports_replication` (UInt8) — Flag that indicates if table engine supports [data replication](../../engines/table-engines/mergetree-family/replication.md). +- `supports_duduplication` (UInt8) — Flag that indicates if table engine supports data deduplication. +- `supports_parallel_insert` (UInt8) — Flag that indicates if table engine supports parallel insert (see [`max_insert_threads`](../../operations/settings/settings.md#settings-max-insert-threads) setting). Example: @@ -34,6 +34,6 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') **See also** -- MergeTree family [query clauses](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) -- Kafka [settings](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) -- Join [settings](../../engines/table-engines/special/join.md#join-limitations-and-settings) +- MergeTree family [query clauses](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) +- Kafka [settings](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) +- Join [settings](../../engines/table-engines/special/join.md#join-limitations-and-settings) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 497e23dd7ca..82e9fa206ea 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -11,67 +11,67 @@ Contains metadata of each table that the server knows about. Columns: -- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. +- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the table is in. -- `name` ([String](../../sql-reference/data-types/string.md)) — Table name. +- `name` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database). +- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database). -- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters). +- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters). -- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. +- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary. -- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Paths to the table data in the file systems. +- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Paths to the table data in the file systems. -- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. +- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system. -- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata. +- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) - Time of latest modification of the table metadata. -- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies. +- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database dependencies. -- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table). +- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table dependencies ([MaterializedView](../../engines/table-engines/special/materializedview.md) tables based on the current table). -- `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table. +- `create_table_query` ([String](../../sql-reference/data-types/string.md)) - The query that was used to create the table. -- `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine. +- `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine. -- `as_select` ([String](../../sql-reference/data-types/string.md)) - `SELECT` query for view. +- `as_select` ([String](../../sql-reference/data-types/string.md)) - `SELECT` query for view. -- `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table. +- `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table. -- `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table. +- `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table. -- `primary_key` ([String](../../sql-reference/data-types/string.md)) - The primary key expression specified in the table. +- `primary_key` ([String](../../sql-reference/data-types/string.md)) - The primary key expression specified in the table. -- `sampling_key` ([String](../../sql-reference/data-types/string.md)) - The sampling key expression specified in the table. +- `sampling_key` ([String](../../sql-reference/data-types/string.md)) - The sampling key expression specified in the table. -- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - The storage policy: +- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - The storage policy: - - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - - [Distributed](../../engines/table-engines/special/distributed.md#distributed) + - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) + - [Distributed](../../engines/table-engines/special/distributed.md#distributed) -- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table). +- `total_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows, if it is possible to quickly determine exact number of rows in the table, otherwise `NULL` (including underying `Buffer` table). -- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage). +- `total_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes, if it is possible to quickly determine exact number of bytes for the table on storage, otherwise `NULL` (does not includes any underlying storage). - - If the table stores data on disk, returns used space on disk (i.e. compressed). - - If the table stores data in memory, returns approximated number of used bytes in memory. + - If the table stores data on disk, returns used space on disk (i.e. compressed). + - If the table stores data in memory, returns approximated number of used bytes in memory. -- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables). +- `lifetime_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of rows INSERTed since server start (only for `Buffer` tables). -- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables). +- `lifetime_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) - Total number of bytes INSERTed since server start (only for `Buffer` tables). -- `comment` ([String](../../sql-reference/data-types/string.md)) - The comment for the table. +- `comment` ([String](../../sql-reference/data-types/string.md)) - The comment for the table. -- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the table itself stores some data on disk or only accesses some other source. +- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the table itself stores some data on disk or only accesses some other source. -- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database loading dependencies (list of objects which should be loaded before the current object). +- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database loading dependencies (list of objects which should be loaded before the current object). -- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table loading dependencies (list of objects which should be loaded before the current object). +- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table loading dependencies (list of objects which should be loaded before the current object). -- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading database. +- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading database. -- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading table. +- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading table. The `system.tables` table is used in `SHOW TABLES` query implementation. diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index c0ddacc719c..897cefab0be 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -7,28 +7,28 @@ Contains logging entries. The logging level which goes to this table can be limi Columns: -- `event_date` (Date) — Date of the entry. -- `event_time` (DateTime) — Time of the entry. -- `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision. -- `microseconds` (UInt32) — Microseconds of the entry. -- `thread_name` (String) — Name of the thread from which the logging was done. -- `thread_id` (UInt64) — OS thread ID. -- `level` (`Enum8`) — Entry level. Possible values: - - `1` or `'Fatal'`. - - `2` or `'Critical'`. - - `3` or `'Error'`. - - `4` or `'Warning'`. - - `5` or `'Notice'`. - - `6` or `'Information'`. - - `7` or `'Debug'`. - - `8` or `'Trace'`. -- `query_id` (String) — ID of the query. -- `logger_name` (LowCardinality(String)) — Name of the logger (i.e. `DDLWorker`). -- `message` (String) — The message itself. -- `revision` (UInt32) — ClickHouse revision. -- `source_file` (LowCardinality(String)) — Source file from which the logging was done. -- `source_line` (UInt64) — Source line from which the logging was done. -- `message_format_string` (LowCardinality(String)) — A format string that was used to format the message. +- `event_date` (Date) — Date of the entry. +- `event_time` (DateTime) — Time of the entry. +- `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision. +- `microseconds` (UInt32) — Microseconds of the entry. +- `thread_name` (String) — Name of the thread from which the logging was done. +- `thread_id` (UInt64) — OS thread ID. +- `level` (`Enum8`) — Entry level. Possible values: + - `1` or `'Fatal'`. + - `2` or `'Critical'`. + - `3` or `'Error'`. + - `4` or `'Warning'`. + - `5` or `'Notice'`. + - `6` or `'Information'`. + - `7` or `'Debug'`. + - `8` or `'Trace'`. +- `query_id` (String) — ID of the query. +- `logger_name` (LowCardinality(String)) — Name of the logger (i.e. `DDLWorker`). +- `message` (String) — The message itself. +- `revision` (UInt32) — ClickHouse revision. +- `source_file` (LowCardinality(String)) — Source file from which the logging was done. +- `source_line` (UInt64) — Source line from which the logging was done. +- `message_format_string` (LowCardinality(String)) — A format string that was used to format the message. **Example** diff --git a/docs/en/operations/system-tables/time_zones.md b/docs/en/operations/system-tables/time_zones.md index 498c9bfd217..906651b2960 100644 --- a/docs/en/operations/system-tables/time_zones.md +++ b/docs/en/operations/system-tables/time_zones.md @@ -7,7 +7,7 @@ Contains a list of time zones that are supported by the ClickHouse server. This Columns: -- `time_zone` (String) — List of supported time zones. +- `time_zone` (String) — List of supported time zones. **Example** diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 4994248ce5c..a5aae422be7 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -12,38 +12,38 @@ To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressTo Columns: -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of sampling moment. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of sampling moment. -- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. +- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision. -- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. +- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds. -- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. +- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. -- `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type: +- `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type: - - `Real` represents collecting stack traces by wall-clock time. - - `CPU` represents collecting stack traces by CPU time. - - `Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. - - `MemorySample` represents collecting random allocations and deallocations. - - `MemoryPeak` represents collecting updates of peak memory usage. - - `ProfileEvent` represents collecting of increments of profile events. + - `Real` represents collecting stack traces by wall-clock time. + - `CPU` represents collecting stack traces by CPU time. + - `Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark. + - `MemorySample` represents collecting random allocations and deallocations. + - `MemoryPeak` represents collecting updates of peak memory usage. + - `ProfileEvent` represents collecting of increments of profile events. -- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier. +- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier. -- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table. +- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table. -- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. +- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process. -- `size` ([Int64](../../sql-reference/data-types/int-uint.md)) - For trace types `Memory`, `MemorySample` or `MemoryPeak` is the amount of memory allocated, for other trace types is 0. +- `size` ([Int64](../../sql-reference/data-types/int-uint.md)) - For trace types `Memory`, `MemorySample` or `MemoryPeak` is the amount of memory allocated, for other trace types is 0. -- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string. +- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string. -- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0. +- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of incremnt of profile event, for other trace types is 0. **Example** diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md index 385e3151eb7..a90fa01a45d 100644 --- a/docs/en/operations/system-tables/users.md +++ b/docs/en/operations/system-tables/users.md @@ -6,30 +6,30 @@ slug: /en/operations/system-tables/users Contains a list of [user accounts](../../guides/sre/user-management/index.md#user-account-management) configured at the server. Columns: -- `name` ([String](../../sql-reference/data-types/string.md)) — User name. +- `name` ([String](../../sql-reference/data-types/string.md)) — User name. -- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — User ID. +- `id` ([UUID](../../sql-reference/data-types/uuid.md)) — User ID. -- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of users. Configured in the `access_control_path` parameter. +- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of users. Configured in the `access_control_path` parameter. -- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password. +- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password. -- `auth_params` ([String](../../sql-reference/data-types/string.md)) — Authentication parameters in the JSON format depending on the `auth_type`. +- `auth_params` ([String](../../sql-reference/data-types/string.md)) — Authentication parameters in the JSON format depending on the `auth_type`. -- `host_ip` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — IP addresses of hosts that are allowed to connect to the ClickHouse server. +- `host_ip` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — IP addresses of hosts that are allowed to connect to the ClickHouse server. -- `host_names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Names of hosts that are allowed to connect to the ClickHouse server. +- `host_names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Names of hosts that are allowed to connect to the ClickHouse server. -- `host_names_regexp` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Regular expression for host names that are allowed to connect to the ClickHouse server. +- `host_names_regexp` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Regular expression for host names that are allowed to connect to the ClickHouse server. -- `host_names_like` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Names of hosts that are allowed to connect to the ClickHouse server, set using the LIKE predicate. +- `host_names_like` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Names of hosts that are allowed to connect to the ClickHouse server, set using the LIKE predicate. -- `default_roles_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that all granted roles set for user by default. +- `default_roles_all` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows that all granted roles set for user by default. -- `default_roles_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of granted roles provided by default. +- `default_roles_list` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — List of granted roles provided by default. -- `default_roles_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — All the granted roles set as default excepting of the listed ones. +- `default_roles_except` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — All the granted roles set as default excepting of the listed ones. ## See Also {#see-also} -- [SHOW USERS](../../sql-reference/statements/show.md#show-users-statement) +- [SHOW USERS](../../sql-reference/statements/show.md#show-users-statement) diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index 503debf4302..7f948a238ac 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -16,20 +16,20 @@ It can be used to do a batch of Keeper path queries. Columns: -- `name` (String) — The name of the node. -- `path` (String) — The path to the node. -- `value` (String) — Node value. -- `dataLength` (Int32) — Size of the value. -- `numChildren` (Int32) — Number of descendants. -- `czxid` (Int64) — ID of the transaction that created the node. -- `mzxid` (Int64) — ID of the transaction that last changed the node. -- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. -- `ctime` (DateTime) — Time of node creation. -- `mtime` (DateTime) — Time of the last modification of the node. -- `version` (Int32) — Node version: the number of times the node was changed. -- `cversion` (Int32) — Number of added or removed descendants. -- `aversion` (Int32) — Number of changes to the ACL. -- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. +- `name` (String) — The name of the node. +- `path` (String) — The path to the node. +- `value` (String) — Node value. +- `dataLength` (Int32) — Size of the value. +- `numChildren` (Int32) — Number of descendants. +- `czxid` (Int64) — ID of the transaction that created the node. +- `mzxid` (Int64) — ID of the transaction that last changed the node. +- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. +- `ctime` (DateTime) — Time of node creation. +- `mtime` (DateTime) — Time of the last modification of the node. +- `version` (Int32) — Node version: the number of times the node was changed. +- `cversion` (Int32) — Number of added or removed descendants. +- `aversion` (Int32) — Number of changes to the ACL. +- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. Example: diff --git a/docs/en/operations/system-tables/zookeeper_log.md b/docs/en/operations/system-tables/zookeeper_log.md index 58c44325737..970ed192a48 100644 --- a/docs/en/operations/system-tables/zookeeper_log.md +++ b/docs/en/operations/system-tables/zookeeper_log.md @@ -9,46 +9,46 @@ For requests, only columns with request parameters are filled in, and the remain Columns with request parameters: -- `type` ([Enum](../../sql-reference/data-types/enum.md)) — Event type in the ZooKeeper client. Can have one of the following values: - - `Request` — The request has been sent. - - `Response` — The response was received. - - `Finalize` — The connection is lost, no response was received. -- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. -- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. -- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address of ZooKeeper server that was used to make the request. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port of ZooKeeper server that was used to make the request. -- `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — The session ID that the ZooKeeper server sets for each connection. -- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired `response`/`finalize` row. -- `has_watch` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The request whether the [watch](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#ch_zkWatches) has been set. -- `op_num` ([Enum](../../sql-reference/data-types/enum.md)) — The type of request or response. -- `path` ([String](../../sql-reference/data-types/string.md)) — The path to the ZooKeeper node specified in the request, or an empty string if the request not requires specifying a path. -- `data` ([String](../../sql-reference/data-types/string.md)) — The data written to the ZooKeeper node (for the `SET` and `CREATE` requests — what the request wanted to write, for the response to the `GET` request — what was read) or an empty string. -- `is_ephemeral` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [ephemeral](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Ephemeral+Nodes). -- `is_sequential` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [sequential](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming). -- `version` ([Nullable(Int32)](../../sql-reference/data-types/nullable.md)) — The version of the ZooKeeper node that the request expects when executing. This is supported for `CHECK`, `SET`, `REMOVE` requests (is relevant `-1` if the request does not check the version or `NULL` for other requests that do not support version checking). -- `requests_size` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of requests included in the multi request (this is a special request that consists of several consecutive ordinary requests and executes them atomically). All requests included in multi request will have the same `xid`. -- `request_idx` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of the request included in multi request (for multi request — `0`, then in order from `1`). +- `type` ([Enum](../../sql-reference/data-types/enum.md)) — Event type in the ZooKeeper client. Can have one of the following values: + - `Request` — The request has been sent. + - `Response` — The response was received. + - `Finalize` — The connection is lost, no response was received. +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened. +- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened. +- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address of ZooKeeper server that was used to make the request. +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port of ZooKeeper server that was used to make the request. +- `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — The session ID that the ZooKeeper server sets for each connection. +- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired `response`/`finalize` row. +- `has_watch` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The request whether the [watch](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#ch_zkWatches) has been set. +- `op_num` ([Enum](../../sql-reference/data-types/enum.md)) — The type of request or response. +- `path` ([String](../../sql-reference/data-types/string.md)) — The path to the ZooKeeper node specified in the request, or an empty string if the request not requires specifying a path. +- `data` ([String](../../sql-reference/data-types/string.md)) — The data written to the ZooKeeper node (for the `SET` and `CREATE` requests — what the request wanted to write, for the response to the `GET` request — what was read) or an empty string. +- `is_ephemeral` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [ephemeral](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Ephemeral+Nodes). +- `is_sequential` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [sequential](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming). +- `version` ([Nullable(Int32)](../../sql-reference/data-types/nullable.md)) — The version of the ZooKeeper node that the request expects when executing. This is supported for `CHECK`, `SET`, `REMOVE` requests (is relevant `-1` if the request does not check the version or `NULL` for other requests that do not support version checking). +- `requests_size` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of requests included in the multi request (this is a special request that consists of several consecutive ordinary requests and executes them atomically). All requests included in multi request will have the same `xid`. +- `request_idx` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of the request included in multi request (for multi request — `0`, then in order from `1`). Columns with request response parameters: -- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed). -- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them: - - `ZOK` — The request was executed seccessfully. - - `ZCONNECTIONLOSS` — The connection was lost. - - `ZOPERATIONTIMEOUT` — The request execution timeout has expired. - - `ZSESSIONEXPIRED` — The session has expired. - - `NULL` — The request is completed. -- `watch_type` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The type of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`. -- `watch_state` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The status of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`. -- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`. -- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created. -- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node. -- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node. -- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node. -- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node. -- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node. -- `stat_numChildren` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of children of this ZooKeeper node. -- `children` ([Array(String)](../../sql-reference/data-types/array.md)) — The list of child ZooKeeper nodes (for responses to `LIST` request). +- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed). +- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them: + - `ZOK` — The request was executed seccessfully. + - `ZCONNECTIONLOSS` — The connection was lost. + - `ZOPERATIONTIMEOUT` — The request execution timeout has expired. + - `ZSESSIONEXPIRED` — The session has expired. + - `NULL` — The request is completed. +- `watch_type` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The type of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`. +- `watch_state` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The status of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`. +- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`. +- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created. +- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node. +- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node. +- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node. +- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node. +- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node. +- `stat_numChildren` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of children of this ZooKeeper node. +- `children` ([Array(String)](../../sql-reference/data-types/array.md)) — The list of child ZooKeeper nodes (for responses to `LIST` request). **Example** @@ -128,5 +128,5 @@ children: ['query-0000000006','query-0000000005','query-0000000004','que **See Also** -- [ZooKeeper](../../operations/tips.md#zookeeper) -- [ZooKeeper guide](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html) +- [ZooKeeper](../../operations/tips.md#zookeeper) +- [ZooKeeper guide](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html) diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md index 0126cda160a..8620b44c368 100644 --- a/docs/en/operations/utilities/clickhouse-benchmark.md +++ b/docs/en/operations/utilities/clickhouse-benchmark.md @@ -41,24 +41,24 @@ clickhouse-benchmark [keys] < queries_file; ## Keys {#clickhouse-benchmark-keys} -- `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. -- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. -- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1. -- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. -- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. -- `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever). -- `-r`, `--randomize` — Random order of queries execution if there is more than one input query. -- `-s`, `--secure` — Using `TLS` connection. -- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). -- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. -- `--cumulative` — Printing cumulative data instead of data per interval. -- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. -- `--json=FILEPATH` — `JSON` output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. -- `--user=USERNAME` — ClickHouse user name. Default value: `default`. -- `--password=PSWD` — ClickHouse user password. Default value: empty string. -- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. -- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns an answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`. -- `--help` — Shows the help message. +- `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-benchmark` will read queries from standard input. +- `-c N`, `--concurrency=N` — Number of queries that `clickhouse-benchmark` sends simultaneously. Default value: 1. +- `-d N`, `--delay=N` — Interval in seconds between intermediate reports (to disable reports set 0). Default value: 1. +- `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-h` keys. +- `-p N`, `--port=N` — Server port. Default value: 9000. For the [comparison mode](#clickhouse-benchmark-comparison-mode) you can use multiple `-p` keys. +- `-i N`, `--iterations=N` — Total number of queries. Default value: 0 (repeat forever). +- `-r`, `--randomize` — Random order of queries execution if there is more than one input query. +- `-s`, `--secure` — Using `TLS` connection. +- `-t N`, `--timelimit=N` — Time limit in seconds. `clickhouse-benchmark` stops sending queries when the specified time limit is reached. Default value: 0 (time limit disabled). +- `--confidence=N` — Level of confidence for T-test. Possible values: 0 (80%), 1 (90%), 2 (95%), 3 (98%), 4 (99%), 5 (99.5%). Default value: 5. In the [comparison mode](#clickhouse-benchmark-comparison-mode) `clickhouse-benchmark` performs the [Independent two-sample Student’s t-test](https://en.wikipedia.org/wiki/Student%27s_t-test#Independent_two-sample_t-test) to determine whether the two distributions aren’t different with the selected level of confidence. +- `--cumulative` — Printing cumulative data instead of data per interval. +- `--database=DATABASE_NAME` — ClickHouse database name. Default value: `default`. +- `--json=FILEPATH` — `JSON` output. When the key is set, `clickhouse-benchmark` outputs a report to the specified JSON-file. +- `--user=USERNAME` — ClickHouse user name. Default value: `default`. +- `--password=PSWD` — ClickHouse user password. Default value: empty string. +- `--stacktrace` — Stack traces output. When the key is set, `clickhouse-bencmark` outputs stack traces of exceptions. +- `--stage=WORD` — Query processing stage at server. ClickHouse stops query processing and returns an answer to `clickhouse-benchmark` at the specified stage. Possible values: `complete`, `fetch_columns`, `with_mergeable_state`. Default value: `complete`. +- `--help` — Shows the help message. If you want to apply some [settings](../../operations/settings/index.md) for queries, pass them as a key `--= SETTING_VALUE`. For example, `--max_memory_usage=1048576`. @@ -91,19 +91,19 @@ localhost:9000, queries 10, QPS: 6.772, RPS: 67904487.440, MiB/s: 518.070, resul In the report you can find: -- Number of queries in the `Queries executed:` field. +- Number of queries in the `Queries executed:` field. -- Status string containing (in order): +- Status string containing (in order): - - Endpoint of ClickHouse server. - - Number of processed queries. - - QPS: How many queries the server performed per second during a period specified in the `--delay` argument. - - RPS: How many rows the server reads per second during a period specified in the `--delay` argument. - - MiB/s: How many mebibytes the server reads per second during a period specified in the `--delay` argument. - - result RPS: How many rows placed by the server to the result of a query per second during a period specified in the `--delay` argument. - - result MiB/s. How many mebibytes placed by the server to the result of a query per second during a period specified in the `--delay` argument. + - Endpoint of ClickHouse server. + - Number of processed queries. + - QPS: How many queries the server performed per second during a period specified in the `--delay` argument. + - RPS: How many rows the server reads per second during a period specified in the `--delay` argument. + - MiB/s: How many mebibytes the server reads per second during a period specified in the `--delay` argument. + - result RPS: How many rows placed by the server to the result of a query per second during a period specified in the `--delay` argument. + - result MiB/s. How many mebibytes placed by the server to the result of a query per second during a period specified in the `--delay` argument. -- Percentiles of queries execution time. +- Percentiles of queries execution time. ## Comparison Mode {#clickhouse-benchmark-comparison-mode} diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md index ccce7ea1b79..a9b82404b90 100644 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ b/docs/en/operations/utilities/clickhouse-copier.md @@ -16,12 +16,12 @@ You can run multiple `clickhouse-copier` instances on different servers to perfo After starting, `clickhouse-copier`: -- Connects to ClickHouse Keeper and receives: +- Connects to ClickHouse Keeper and receives: - - Copying jobs. - - The state of the copying jobs. + - Copying jobs. + - The state of the copying jobs. -- It performs the jobs. +- It performs the jobs. Each running process chooses the “closest” shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary. @@ -39,12 +39,12 @@ $ clickhouse-copier --daemon --config keeper.xml --task-path /task/path --base-d Parameters: -- `daemon` — Starts `clickhouse-copier` in daemon mode. -- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. -- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. -- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. -- `task-upload-force` — Force upload `task-file` even if node already exists. -- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. +- `daemon` — Starts `clickhouse-copier` in daemon mode. +- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. +- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. +- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. +- `task-upload-force` — Force upload `task-file` even if node already exists. +- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. ## Format of keeper.xml {#format-of-zookeeper-xml} diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index a23e0745dec..f64d8337387 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -6,7 +6,17 @@ sidebar_label: clickhouse-local # clickhouse-local -The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/index.md). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines. +## Related Content + +- Blog: [Extracting, Converting, and Querying Data in Local Files using clickhouse-local](https://clickhouse.com/blog/extracting-converting-querying-local-files-with-sql-clickhouse-local) + +## When to use clickhouse-local vs. ClickHouse + +`clickhouse-local` is an easy-to-use version of ClickHouse that is ideal for developers who need to perform fast processing on local and remote files using SQL without having to install a full database server. With `clickhouse-local`, developers can use SQL commands (using the [ClickHouse SQL dialect](../../sql-reference/index.md)) directly from the command line, providing a simple and efficient way to access ClickHouse features without the need for a full ClickHouse installation. One of the main benefits of `clickhouse-local` is that it is already included when installing [clickhouse-client](https://clickhouse.com/docs/en/integrations/sql-clients/clickhouse-client-local). This means that developers can get started with `clickhouse-local` quickly, without the need for a complex installation process. + +While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`. + +Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3). ## Download clickhouse-local @@ -170,25 +180,25 @@ $ ./clickhouse local --structure "table_structure" --input-format "format_of_inc Arguments: -- `-S`, `--structure` — table structure for input data. -- `--input-format` — input format, `TSV` by default. -- `-f`, `--file` — path to data, `stdin` by default. -- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option. -- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option. -- `-N`, `--table` — table name where to put output data, `table` by default. -- `--format`, `--output-format` — output format, `TSV` by default. -- `-d`, `--database` — default database, `_local` by default. -- `--stacktrace` — whether to dump debug output in case of exception. -- `--echo` — print query before execution. -- `--verbose` — more details on query execution. -- `--logger.console` — Log to console. -- `--logger.log` — Log file name. -- `--logger.level` — Log level. -- `--ignore-error` — do not stop processing if a query failed. -- `-c`, `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty. -- `--no-system-tables` — do not attach system tables. -- `--help` — arguments references for `clickhouse-local`. -- `-V`, `--version` — print version information and exit. +- `-S`, `--structure` — table structure for input data. +- `--input-format` — input format, `TSV` by default. +- `-f`, `--file` — path to data, `stdin` by default. +- `-q`, `--query` — queries to execute with `;` as delimeter. You must specify either `query` or `queries-file` option. +- `--queries-file` - file path with queries to execute. You must specify either `query` or `queries-file` option. +- `-N`, `--table` — table name where to put output data, `table` by default. +- `--format`, `--output-format` — output format, `TSV` by default. +- `-d`, `--database` — default database, `_local` by default. +- `--stacktrace` — whether to dump debug output in case of exception. +- `--echo` — print query before execution. +- `--verbose` — more details on query execution. +- `--logger.console` — Log to console. +- `--logger.log` — Log file name. +- `--logger.level` — Log level. +- `--ignore-error` — do not stop processing if a query failed. +- `-c`, `--config-file` — path to configuration file in same format as for ClickHouse server, by default the configuration empty. +- `--no-system-tables` — do not attach system tables. +- `--help` — arguments references for `clickhouse-local`. +- `-V`, `--version` — print version information and exit. Also there are arguments for each ClickHouse configuration variable which are more commonly used instead of `--config-file`. diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index bf6b3a63d23..112a51cfa97 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -7,10 +7,10 @@ pagination_next: 'en/operations/utilities/clickhouse-copier' # List of tools and utilities -- [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this. -- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. -- [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings. -- [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries. -- [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data. -- [ClickHouse compressor](../../operations/utilities/clickhouse-compressor.md) — Compresses and decompresses data. -- [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — A proxy server for ODBC driver. +- [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this. +- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster. +- [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings. +- [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries. +- [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data. +- [ClickHouse compressor](../../operations/utilities/clickhouse-compressor.md) — Compresses and decompresses data. +- [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — A proxy server for ODBC driver. diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index 704e88c6313..e1db5d8d23e 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -44,7 +44,7 @@ If you apply this combinator, the aggregate function returns the same value but **Arguments** -- `x` — Aggregate function parameters. +- `x` — Aggregate function parameters. **Returned values** @@ -72,11 +72,11 @@ If you apply this combinator, the aggregate function does not return the resulti To work with these states, use: -- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine. -- [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function. -- [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function. -- [-Merge](#aggregate_functions_combinators-merge) combinator. -- [-MergeState](#aggregate_functions_combinators-mergestate) combinator. +- [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engine. +- [finalizeAggregation](../../sql-reference/functions/other-functions.md#function-finalizeaggregation) function. +- [runningAccumulate](../../sql-reference/functions/other-functions.md#runningaccumulate) function. +- [-Merge](#aggregate_functions_combinators-merge) combinator. +- [-MergeState](#aggregate_functions_combinators-mergestate) combinator. ## -Merge @@ -111,7 +111,7 @@ If an aggregate function does not have input values, with this combinator it ret **Arguments** -- `x` — Aggregate function parameters. +- `x` — Aggregate function parameters. **Returned values** @@ -171,12 +171,12 @@ This combinator converts a result of an aggregate function to the [Nullable](../ **Arguments** -- `x` — Aggregate function parameters. +- `x` — Aggregate function parameters. **Returned values** -- The result of the aggregate function, converted to the `Nullable` data type. -- `NULL`, if there is nothing to aggregate. +- The result of the aggregate function, converted to the `Nullable` data type. +- `NULL`, if there is nothing to aggregate. Type: `Nullable(aggregate function return type)`. @@ -228,15 +228,15 @@ Lets you divide data into groups, and then separately aggregates the data in tho **Arguments** -- `start` — Starting value of the whole required interval for `resampling_key` values. -- `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval does not include the `stop` value `[start, stop)`. -- `step` — Step for separating the whole interval into subintervals. The `aggFunction` is executed over each of those subintervals independently. -- `resampling_key` — Column whose values are used for separating data into intervals. -- `aggFunction_params` — `aggFunction` parameters. +- `start` — Starting value of the whole required interval for `resampling_key` values. +- `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval does not include the `stop` value `[start, stop)`. +- `step` — Step for separating the whole interval into subintervals. The `aggFunction` is executed over each of those subintervals independently. +- `resampling_key` — Column whose values are used for separating data into intervals. +- `aggFunction_params` — `aggFunction` parameters. **Returned values** -- Array of `aggFunction` results for each subinterval. +- Array of `aggFunction` results for each subinterval. **Example** @@ -285,3 +285,8 @@ FROM people │ [3,2] │ [11.5,12.949999809265137] │ └────────┴───────────────────────────┘ ``` + + +## Related Content + +- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) diff --git a/docs/en/sql-reference/aggregate-functions/index.md b/docs/en/sql-reference/aggregate-functions/index.md index 56a55c6b7a0..8951ac4ee6a 100644 --- a/docs/en/sql-reference/aggregate-functions/index.md +++ b/docs/en/sql-reference/aggregate-functions/index.md @@ -10,8 +10,8 @@ Aggregate functions work in the [normal](http://www.sql-tutorial.com/sql-aggrega ClickHouse also supports: -- [Parametric aggregate functions](../../sql-reference/aggregate-functions/parametric-functions.md#aggregate_functions_parametric), which accept other parameters in addition to columns. -- [Combinators](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators), which change the behavior of aggregate functions. +- [Parametric aggregate functions](../../sql-reference/aggregate-functions/parametric-functions.md#aggregate_functions_parametric), which accept other parameters in addition to columns. +- [Combinators](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators), which change the behavior of aggregate functions. ## NULL Processing diff --git a/docs/en/sql-reference/aggregate-functions/parametric-functions.md b/docs/en/sql-reference/aggregate-functions/parametric-functions.md index 3b874dbe7cd..1b20f74d466 100644 --- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md @@ -28,7 +28,7 @@ The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.or **Returned values** -- [Array](../../sql-reference/data-types/array.md) of [Tuples](../../sql-reference/data-types/tuple.md) of the following format: +- [Array](../../sql-reference/data-types/array.md) of [Tuples](../../sql-reference/data-types/tuple.md) of the following format: ``` [(lower_1, upper_1, height_1), ... (lower_N, upper_N, height_N)] @@ -96,29 +96,29 @@ Events that occur at the same second may lay in the sequence in an undefined ord **Arguments** -- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. +- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. **Parameters** -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). **Returned values** -- 1, if the pattern is matched. -- 0, if the pattern isn’t matched. +- 1, if the pattern is matched. +- 0, if the pattern isn’t matched. Type: `UInt8`. **Pattern syntax** -- `(?N)` — Matches the condition argument at position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter. +- `(?N)` — Matches the condition argument at position `N`. Conditions are numbered in the `[1, 32]` range. For example, `(?1)` matches the argument passed to the `cond1` parameter. -- `.*` — Matches any number of events. You do not need conditional arguments to match this element of the pattern. +- `.*` — Matches any number of events. You do not need conditional arguments to match this element of the pattern. -- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=`, `==` operators. +- `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=`, `==` operators. **Examples** @@ -170,7 +170,7 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM **See Also** -- [sequenceCount](#function-sequencecount) +- [sequenceCount](#function-sequencecount) ## sequenceCount(pattern)(time, cond1, cond2, …) @@ -186,17 +186,17 @@ sequenceCount(pattern)(timestamp, cond1, cond2, ...) **Arguments** -- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. +- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types. -- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. +- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them. **Parameters** -- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). +- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax). **Returned values** -- Number of non-overlapping event chains that are matched. +- Number of non-overlapping event chains that are matched. Type: `UInt64`. @@ -229,7 +229,7 @@ SELECT sequenceCount('(?1).*(?2)')(time, number = 1, number = 2) FROM t **See Also** -- [sequenceMatch](#function-sequencematch) +- [sequenceMatch](#function-sequencematch) ## windowFunnel @@ -237,11 +237,11 @@ Searches for event chains in a sliding time window and calculates the maximum nu The function works according to the algorithm: -- The function searches for data that triggers the first condition in the chain and sets the event counter to 1. This is the moment when the sliding window starts. +- The function searches for data that triggers the first condition in the chain and sets the event counter to 1. This is the moment when the sliding window starts. -- If events from the chain occur sequentially within the window, the counter is incremented. If the sequence of events is disrupted, the counter isn’t incremented. +- If events from the chain occur sequentially within the window, the counter is incremented. If the sequence of events is disrupted, the counter isn’t incremented. -- If the data has multiple event chains at varying points of completion, the function will only output the size of the longest chain. +- If the data has multiple event chains at varying points of completion, the function will only output the size of the longest chain. **Syntax** @@ -251,16 +251,16 @@ windowFunnel(window, [mode, [mode, ... ]])(timestamp, cond1, cond2, ..., condN) **Arguments** -- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). -- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). +- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1). +- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). **Parameters** -- `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`. -- `mode` — It is an optional argument. One or more modes can be set. - - `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing. - - `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2. - - `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps. +- `window` — Length of the sliding window, it is the time interval between the first and the last condition. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond1 <= timestamp of cond2 <= ... <= timestamp of condN <= timestamp of cond1 + window`. +- `mode` — It is an optional argument. One or more modes can be set. + - `'strict_deduplication'` — If the same condition holds for the sequence of events, then such repeating event interrupts further processing. + - `'strict_order'` — Don't allow interventions of other events. E.g. in the case of `A->B->D->C`, it stops finding `A->B->C` at the `D` and the max event level is 2. + - `'strict_increase'` — Apply conditions only to events with strictly increasing timestamps. **Returned value** @@ -341,14 +341,14 @@ retention(cond1, cond2, ..., cond32); **Arguments** -- `cond` — An expression that returns a `UInt8` result (1 or 0). +- `cond` — An expression that returns a `UInt8` result (1 or 0). **Returned value** The array of 1 or 0. -- 1 — Condition was met for the event. -- 0 — Condition wasn’t met for the event. +- 1 — Condition was met for the event. +- 0 — Condition wasn’t met for the event. Type: `UInt8`. @@ -481,9 +481,9 @@ Result: Where: -- `r1`- the number of unique visitors who visited the site during 2020-01-01 (the `cond1` condition). -- `r2`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-02 (`cond1` and `cond2` conditions). -- `r3`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions). +- `r1`- the number of unique visitors who visited the site during 2020-01-01 (the `cond1` condition). +- `r2`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-02 (`cond1` and `cond2` conditions). +- `r3`- the number of unique visitors who visited the site during a specific time period between 2020-01-01 and 2020-01-03 (`cond1` and `cond3` conditions). ## uniqUpTo(N)(x) @@ -524,11 +524,11 @@ sequenceNextNode(direction, base)(timestamp, event_column, base_condition, event **Parameters** -- `direction` — Used to navigate to directions. +- `direction` — Used to navigate to directions. - forward — Moving forward. - backward — Moving backward. -- `base` — Used to set the base point. +- `base` — Used to set the base point. - head — Set the base point to the first event. - tail — Set the base point to the last event. - first_match — Set the base point to the first matched `event1`. @@ -536,15 +536,15 @@ sequenceNextNode(direction, base)(timestamp, event_column, base_condition, event **Arguments** -- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types. -- `event_column` — Name of the column containing the value of the next event to be returned. Data types supported: [String](../../sql-reference/data-types/string.md) and [Nullable(String)](../../sql-reference/data-types/nullable.md). -- `base_condition` — Condition that the base point must fulfill. -- `event1`, `event2`, ... — Conditions describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). +- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types. +- `event_column` — Name of the column containing the value of the next event to be returned. Data types supported: [String](../../sql-reference/data-types/string.md) and [Nullable(String)](../../sql-reference/data-types/nullable.md). +- `base_condition` — Condition that the base point must fulfill. +- `event1`, `event2`, ... — Conditions describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md). **Returned values** -- `event_column[next_index]` — If the pattern is matched and next value exists. -- `NULL` - If the pattern isn’t matched or next value doesn't exist. +- `event_column[next_index]` — If the pattern is matched and next value exists. +- `NULL` - If the pattern isn’t matched or next value doesn't exist. Type: [Nullable(String)](../../sql-reference/data-types/nullable.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md index 88a56463de1..9fbc21910f8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md @@ -13,7 +13,7 @@ anyHeavy(column) **Arguments** -- `column` – The column name. +- `column` – The column name. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmax.md b/docs/en/sql-reference/aggregate-functions/reference/argmax.md index 7b99c831010..65c43ab04c0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmax.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmax.md @@ -15,12 +15,12 @@ argMax(arg, val) **Arguments** -- `arg` — Argument. -- `val` — Value. +- `arg` — Argument. +- `val` — Value. **Returned value** -- `arg` value that corresponds to maximum `val` value. +- `arg` value that corresponds to maximum `val` value. Type: matches `arg` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/argmin.md b/docs/en/sql-reference/aggregate-functions/reference/argmin.md index 945dda5e46d..a7c21e3f15b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/argmin.md +++ b/docs/en/sql-reference/aggregate-functions/reference/argmin.md @@ -15,12 +15,12 @@ argMin(arg, val) **Arguments** -- `arg` — Argument. -- `val` — Value. +- `arg` — Argument. +- `val` — Value. **Returned value** -- `arg` value that corresponds to minimum `val` value. +- `arg` value that corresponds to minimum `val` value. Type: matches `arg` type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/avg.md b/docs/en/sql-reference/aggregate-functions/reference/avg.md index dd37fe62b95..5463d8a1874 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avg.md @@ -15,12 +15,12 @@ avg(x) **Arguments** -- `x` — input values, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). +- `x` — input values, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). **Returned value** -- The arithmetic mean, always as [Float64](../../../sql-reference/data-types/float.md). -- `NaN` if the input parameter `x` is empty. +- The arithmetic mean, always as [Float64](../../../sql-reference/data-types/float.md). +- `NaN` if the input parameter `x` is empty. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md index 00dffdc33d2..99d3bac763d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md @@ -15,8 +15,8 @@ avgWeighted(x, weight) **Arguments** -- `x` — Values. -- `weight` — Weights of the values. +- `x` — Values. +- `weight` — Weights of the values. `x` and `weight` must both be [Integer](../../../sql-reference/data-types/int-uint.md), @@ -26,8 +26,8 @@ but may have different types. **Returned value** -- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty. -- Weighted mean otherwise. +- `NaN` if all the weights are equal to 0 or the supplied weights parameter is empty. +- Weighted mean otherwise. **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/contingency.md b/docs/en/sql-reference/aggregate-functions/reference/contingency.md index 9e89e99e66d..1b53ca1528f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/contingency.md +++ b/docs/en/sql-reference/aggregate-functions/reference/contingency.md @@ -16,11 +16,11 @@ contingency(column1, column2) **Arguments** -- `column1` and `column2` are the columns to be compared +- `column1` and `column2` are the columns to be compared **Returned value** -- a value between 0 to 1. The larger the result, the closer the association of the two columns. +- a value between 0 to 1. The larger the result, the closer the association of the two columns. **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/count.md b/docs/en/sql-reference/aggregate-functions/reference/count.md index 356f731ff16..a98c8e50174 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/count.md +++ b/docs/en/sql-reference/aggregate-functions/reference/count.md @@ -9,20 +9,20 @@ Counts the number of rows or not-NULL values. ClickHouse supports the following syntaxes for `count`: -- `count(expr)` or `COUNT(DISTINCT expr)`. -- `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. +- `count(expr)` or `COUNT(DISTINCT expr)`. +- `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific. **Arguments** The function can take: -- Zero parameters. -- One [expression](../../../sql-reference/syntax.md#syntax-expressions). +- Zero parameters. +- One [expression](../../../sql-reference/syntax.md#syntax-expressions). **Returned value** -- If the function is called without parameters it counts the number of rows. -- If the [expression](../../../sql-reference/syntax.md#syntax-expressions) is passed, then the function counts how many times this expression returned not null. If the expression returns a [Nullable](../../../sql-reference/data-types/nullable.md)-type value, then the result of `count` stays not `Nullable`. The function returns 0 if the expression returned `NULL` for all the rows. +- If the function is called without parameters it counts the number of rows. +- If the [expression](../../../sql-reference/syntax.md#syntax-expressions) is passed, then the function counts how many times this expression returned not null. If the expression returns a [Nullable](../../../sql-reference/data-types/nullable.md)-type value, then the result of `count` stays not `Nullable`. The function returns 0 if the expression returned `NULL` for all the rows. In both cases the type of the returned value is [UInt64](../../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md index fa37e3b5781..f412724ea08 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md @@ -15,11 +15,11 @@ cramersV(column1, column2) **Arguments** -- `column1` and `column2` are the columns to be compared +- `column1` and `column2` are the columns to be compared **Returned value** -- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). +- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md index 651b5e7b5a2..8e577efbc4d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md @@ -18,11 +18,11 @@ cramersVBiasCorrected(column1, column2) **Arguments** -- `column1` and `column2` are the columns to be compared +- `column1` and `column2` are the columns to be compared **Returned value** -- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). +- a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md index d5d9e9369a4..37d9d08cbdb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasum.md @@ -19,7 +19,7 @@ deltaSum(value) **Arguments** -- `value` — Input values, must be [Integer](../../data-types/int-uint.md) or [Float](../../data-types/float.md) type. +- `value` — Input values, must be [Integer](../../data-types/int-uint.md) or [Float](../../data-types/float.md) type. **Returned value** @@ -71,4 +71,4 @@ Result: ## See Also -- [runningDifference](../../functions/other-functions.md#other_functions-runningdifference) +- [runningDifference](../../functions/other-functions.md#other_functions-runningdifference) diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md index e08e69b7cf6..c51d86389b0 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md @@ -6,7 +6,7 @@ title: deltaSumTimestamp Adds the difference between consecutive rows. If the difference is negative, it is ignored. -This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that are ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the "right" order. This function keeps track of the `timestamp` of the values it's seen, so it's possible to order the states correctly during merging. +This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that store data ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the correct order, without storing the original, unrounded timestamp value. The `deltaSumTimestamp` function keeps track of the original `timestamp` of the values it's seen, so the values (states) of the function are correctly computed during merging of parts. To calculate the delta sum across an ordered collection you can simply use the [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) function. @@ -18,12 +18,12 @@ deltaSumTimestamp(value, timestamp) **Arguments** -- `value` — Input values, must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). -- `timestamp` — The parameter for order values, must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). +- `value` — Input values, must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). +- `timestamp` — The parameter for order values, must be some [Integer](../../data-types/int-uint.md) type or [Float](../../data-types/float.md) type or a [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). **Returned value** -- Accumulated differences between consecutive values, ordered by the `timestamp` parameter. +- Accumulated differences between consecutive values, ordered by the `timestamp` parameter. Type: [Integer](../../data-types/int-uint.md) or [Float](../../data-types/float.md) or [Date](../../data-types/date.md) or [DateTime](../../data-types/datetime.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/entropy.md b/docs/en/sql-reference/aggregate-functions/reference/entropy.md index d86f4f4197a..fc8d627ecab 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/entropy.md +++ b/docs/en/sql-reference/aggregate-functions/reference/entropy.md @@ -15,11 +15,11 @@ entropy(val) **Arguments** -- `val` — Column of values of any type. +- `val` — Column of values of any type. **Returned value** -- Shannon entropy. +- Shannon entropy. Type: [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md index 5d82d3575fc..a8203c6b3f4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md +++ b/docs/en/sql-reference/aggregate-functions/reference/exponentialmovingaverage.md @@ -18,12 +18,12 @@ Each `value` corresponds to the determinate `timeunit`. The half-life `x` is the **Arguments** -- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions.md#intdiva-b). +- `value` — Value. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `timeunit` — Timeunit. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). Timeunit is not timestamp (seconds), it's -- an index of the time interval. Can be calculated using [intDiv](../../functions/arithmetic-functions.md#intdiva-b). **Parameters** -- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `x` — Half-life period. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). **Returned values** diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md index aafa643a972..d745e8a0e7a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md @@ -15,19 +15,19 @@ groupArrayInsertAt(default_x, size)(x, pos) If in one query several values are inserted into the same position, the function behaves in the following ways: -- If a query is executed in a single thread, the first one of the inserted values is used. -- If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values. +- If a query is executed in a single thread, the first one of the inserted values is used. +- If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values. **Arguments** -- `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md). -- `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). -- `default_x` — Default value for substituting in empty positions. Optional parameter. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in the data type configured for the `x` parameter. If `default_x` is not defined, the [default values](../../../sql-reference/statements/create/table.md#create-default-values) are used. -- `size` — Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` must be specified. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). +- `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md). +- `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). +- `default_x` — Default value for substituting in empty positions. Optional parameter. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in the data type configured for the `x` parameter. If `default_x` is not defined, the [default values](../../../sql-reference/statements/create/table.md#create-default-values) are used. +- `size` — Length of the resulting array. Optional parameter. When using this parameter, the default value `default_x` must be specified. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges). **Returned value** -- Array with inserted values. +- Array with inserted values. Type: [Array](../../../sql-reference/data-types/array.md#data-type-array). diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md index 8fa1939e7d3..32c0608afeb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md @@ -16,12 +16,12 @@ The function can take the window size as a parameter. If left unspecified, the f **Arguments** -- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. -- `window_size` — Size of the calculation window. +- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. +- `window_size` — Size of the calculation window. **Returned values** -- Array of the same size and type as the input data. +- Array of the same size and type as the input data. The function uses [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero). It truncates the decimal places insignificant for the resulting data type. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md index a51857418c6..6f2a60dd080 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md @@ -16,12 +16,12 @@ The function can take the window size as a parameter. If left unspecified, the f **Arguments** -- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. -- `window_size` — Size of the calculation window. +- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value. +- `window_size` — Size of the calculation window. **Returned values** -- Array of the same size and type as the input data. +- Array of the same size and type as the input data. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md index 26c41c6636b..393087161df 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraysample.md @@ -15,13 +15,13 @@ groupArraySample(max_size[, seed])(x) **Arguments** -- `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md). -- `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`. -- `x` — Argument (column name or expression). +- `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md). +- `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`. +- `x` — Argument (column name or expression). **Returned values** -- Array of randomly selected `x` arguments. +- Array of randomly selected `x` arguments. Type: [Array](../../data-types/array.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md index bd8e72e0fec..e25e3a54356 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/index.md +++ b/docs/en/sql-reference/aggregate-functions/reference/index.md @@ -9,75 +9,75 @@ toc_hidden: true Standard aggregate functions: -- [count](../../../sql-reference/aggregate-functions/reference/count.md) -- [min](../../../sql-reference/aggregate-functions/reference/min.md) -- [max](../../../sql-reference/aggregate-functions/reference/max.md) -- [sum](../../../sql-reference/aggregate-functions/reference/sum.md) -- [avg](../../../sql-reference/aggregate-functions/reference/avg.md) -- [any](../../../sql-reference/aggregate-functions/reference/any.md) -- [stddevPop](../../../sql-reference/aggregate-functions/reference/stddevpop.md) -- [stddevSamp](../../../sql-reference/aggregate-functions/reference/stddevsamp.md) -- [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md) -- [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) -- [covarPop](../../../sql-reference/aggregate-functions/reference/covarpop.md) -- [covarSamp](../../../sql-reference/aggregate-functions/reference/covarsamp.md) +- [count](../../../sql-reference/aggregate-functions/reference/count.md) +- [min](../../../sql-reference/aggregate-functions/reference/min.md) +- [max](../../../sql-reference/aggregate-functions/reference/max.md) +- [sum](../../../sql-reference/aggregate-functions/reference/sum.md) +- [avg](../../../sql-reference/aggregate-functions/reference/avg.md) +- [any](../../../sql-reference/aggregate-functions/reference/any.md) +- [stddevPop](../../../sql-reference/aggregate-functions/reference/stddevpop.md) +- [stddevSamp](../../../sql-reference/aggregate-functions/reference/stddevsamp.md) +- [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md) +- [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) +- [covarPop](../../../sql-reference/aggregate-functions/reference/covarpop.md) +- [covarSamp](../../../sql-reference/aggregate-functions/reference/covarsamp.md) ClickHouse-specific aggregate functions: -- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md) -- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md) -- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md) -- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md) -- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md) -- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) -- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) -- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md) -- [groupArrayLast](../../../sql-reference/aggregate-functions/reference/grouparraylast.md) -- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md) -- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) -- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) -- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) -- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) -- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) -- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) -- [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md) -- [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md) -- [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md) -- [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md) -- [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md) -- [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md) -- [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md) -- [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md) -- [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md) -- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md) -- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md) -- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md) -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) -- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md) -- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md) -- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md) -- [quantileExactLow](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow) -- [quantileExactHigh](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh) -- [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md) -- [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md) -- [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md) -- [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md) -- [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) -- [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) -- [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) -- [quantileBFloat16Weighted](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) -- [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md) -- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md) -- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) -- [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md) -- [contingency](./contingency.md) -- [cramersV](./cramersv.md) -- [cramersVBiasCorrected](./cramersvbiascorrected.md) -- [theilsU](./theilsu.md) -- [maxIntersections](./maxintersections.md) -- [maxIntersectionsPosition](./maxintersectionsposition.md) +- [anyHeavy](../../../sql-reference/aggregate-functions/reference/anyheavy.md) +- [anyLast](../../../sql-reference/aggregate-functions/reference/anylast.md) +- [argMin](../../../sql-reference/aggregate-functions/reference/argmin.md) +- [argMax](../../../sql-reference/aggregate-functions/reference/argmax.md) +- [avgWeighted](../../../sql-reference/aggregate-functions/reference/avgweighted.md) +- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) +- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md) +- [groupArray](../../../sql-reference/aggregate-functions/reference/grouparray.md) +- [groupArrayLast](../../../sql-reference/aggregate-functions/reference/grouparraylast.md) +- [groupUniqArray](../../../sql-reference/aggregate-functions/reference/groupuniqarray.md) +- [groupArrayInsertAt](../../../sql-reference/aggregate-functions/reference/grouparrayinsertat.md) +- [groupArrayMovingAvg](../../../sql-reference/aggregate-functions/reference/grouparraymovingavg.md) +- [groupArrayMovingSum](../../../sql-reference/aggregate-functions/reference/grouparraymovingsum.md) +- [groupBitAnd](../../../sql-reference/aggregate-functions/reference/groupbitand.md) +- [groupBitOr](../../../sql-reference/aggregate-functions/reference/groupbitor.md) +- [groupBitXor](../../../sql-reference/aggregate-functions/reference/groupbitxor.md) +- [groupBitmap](../../../sql-reference/aggregate-functions/reference/groupbitmap.md) +- [groupBitmapAnd](../../../sql-reference/aggregate-functions/reference/groupbitmapand.md) +- [groupBitmapOr](../../../sql-reference/aggregate-functions/reference/groupbitmapor.md) +- [groupBitmapXor](../../../sql-reference/aggregate-functions/reference/groupbitmapxor.md) +- [sumWithOverflow](../../../sql-reference/aggregate-functions/reference/sumwithoverflow.md) +- [sumMap](../../../sql-reference/aggregate-functions/reference/summap.md) +- [minMap](../../../sql-reference/aggregate-functions/reference/minmap.md) +- [maxMap](../../../sql-reference/aggregate-functions/reference/maxmap.md) +- [skewSamp](../../../sql-reference/aggregate-functions/reference/skewsamp.md) +- [skewPop](../../../sql-reference/aggregate-functions/reference/skewpop.md) +- [kurtSamp](../../../sql-reference/aggregate-functions/reference/kurtsamp.md) +- [kurtPop](../../../sql-reference/aggregate-functions/reference/kurtpop.md) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md) +- [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md) +- [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md) +- [quantileExactLow](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow) +- [quantileExactHigh](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh) +- [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md) +- [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md) +- [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md) +- [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md) +- [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) +- [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md) +- [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16) +- [quantileBFloat16Weighted](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted) +- [simpleLinearRegression](../../../sql-reference/aggregate-functions/reference/simplelinearregression.md) +- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md) +- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md) +- [categoricalInformationValue](../../../sql-reference/aggregate-functions/reference/categoricalinformationvalue.md) +- [contingency](./contingency.md) +- [cramersV](./cramersv.md) +- [cramersVBiasCorrected](./cramersvbiascorrected.md) +- [theilsU](./theilsu.md) +- [maxIntersections](./maxintersections.md) +- [maxIntersectionsPosition](./maxintersectionsposition.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md index e161a8c5754..5990345b765 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/intervalLengthSum.md @@ -15,8 +15,8 @@ intervalLengthSum(start, end) **Arguments** -- `start` — The starting value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). -- `end` — The ending value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). +- `start` — The starting value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). +- `end` — The ending value of the interval. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) or [Date](../../../sql-reference/data-types/date.md#data_type-date). :::note Arguments must be of the same data type. Otherwise, an exception will be thrown. @@ -24,7 +24,7 @@ Arguments must be of the same data type. Otherwise, an exception will be thrown. **Returned value** -- Total length of union of all ranges (segments on numeric axis). Depending on the type of the argument, the return value may be [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64) type. +- Total length of union of all ranges (segments on numeric axis). Depending on the type of the argument, the return value may be [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64) type. **Examples** diff --git a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md index 70c75b898c6..af744f445d9 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/mannwhitneyutest.md @@ -19,23 +19,23 @@ The null hypothesis is that two populations are stochastically equal. Also one-s **Arguments** -- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — sample index. [Integer](../../../sql-reference/data-types/int-uint.md). **Parameters** -- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). - - `'two-sided'`; - - `'greater'`; - - `'less'`. -- `continuity_correction` — if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md). +- `alternative` — alternative hypothesis. (Optional, default: `'two-sided'`.) [String](../../../sql-reference/data-types/string.md). + - `'two-sided'`; + - `'greater'`; + - `'less'`. +- `continuity_correction` — if not 0 then continuity correction in the normal approximation for the p-value is applied. (Optional, default: 1.) [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** [Tuple](../../../sql-reference/data-types/tuple.md) with two elements: -- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md). -- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -69,5 +69,5 @@ Result: **See Also** -- [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test) -- [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering) +- [Mann–Whitney U test](https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test) +- [Stochastic ordering](https://en.wikipedia.org/wiki/Stochastic_ordering) diff --git a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md index 89742ca1509..1cf2bebf26f 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md @@ -19,23 +19,23 @@ The null hypothesis is that means of populations are equal. Normal distribution **Arguments** -- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). **Parameters** -- `population_variance_x` — Variance for population x. [Float](../../../sql-reference/data-types/float.md). -- `population_variance_y` — Variance for population y. [Float](../../../sql-reference/data-types/float.md). -- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). +- `population_variance_x` — Variance for population x. [Float](../../../sql-reference/data-types/float.md). +- `population_variance_y` — Variance for population y. [Float](../../../sql-reference/data-types/float.md). +- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). **Returned values** [Tuple](../../../sql-reference/data-types/tuple.md) with four elements: -- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). -- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). -- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). -- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/median.md b/docs/en/sql-reference/aggregate-functions/reference/median.md index 5ac3c6ef721..f20b23a0c8b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/median.md +++ b/docs/en/sql-reference/aggregate-functions/reference/median.md @@ -9,15 +9,15 @@ The `median*` functions are the aliases for the corresponding `quantile*` functi Functions: -- `median` — Alias for [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). -- `medianDeterministic` — Alias for [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic). -- `medianExact` — Alias for [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). -- `medianExactWeighted` — Alias for [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md#quantileexactweighted). -- `medianTiming` — Alias for [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). -- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted). -- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). -- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). -- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). +- `median` — Alias for [quantile](../../../sql-reference/aggregate-functions/reference/quantile.md#quantile). +- `medianDeterministic` — Alias for [quantileDeterministic](../../../sql-reference/aggregate-functions/reference/quantiledeterministic.md#quantiledeterministic). +- `medianExact` — Alias for [quantileExact](../../../sql-reference/aggregate-functions/reference/quantileexact.md#quantileexact). +- `medianExactWeighted` — Alias for [quantileExactWeighted](../../../sql-reference/aggregate-functions/reference/quantileexactweighted.md#quantileexactweighted). +- `medianTiming` — Alias for [quantileTiming](../../../sql-reference/aggregate-functions/reference/quantiletiming.md#quantiletiming). +- `medianTimingWeighted` — Alias for [quantileTimingWeighted](../../../sql-reference/aggregate-functions/reference/quantiletimingweighted.md#quantiletimingweighted). +- `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest). +- `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted). +- `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantile.md b/docs/en/sql-reference/aggregate-functions/reference/quantile.md index 414574e00e6..91b6b1b0d80 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantile.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantile.md @@ -23,18 +23,18 @@ Alias: `median`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -65,5 +65,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileApprox.md b/docs/en/sql-reference/aggregate-functions/reference/quantileApprox.md deleted file mode 100644 index 21b9a3500c4..00000000000 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileApprox.md +++ /dev/null @@ -1,76 +0,0 @@ ---- -slug: /en/sql-reference/aggregate-functions/reference/quantileApprox -sidebar_position: 204 ---- - -# quantileApprox - -Computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [Greenwald-Khanna](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf) algorithm. The Greenwald-Khanna algorithm is an algorithm used to compute quantiles on a stream of data in a highly efficient manner. It was introduced by Michael Greenwald and Sanjeev Khanna in 2001. It is widely used in databases and big data systems where computing accurate quantiles on a large stream of data in real-time is necessary. The algorithm is highly efficient, taking only O(log n) space and O(log log n) time per item (where n is the size of the input). It is also highly accurate, providing an approximate quantile value with high probability. - -`quantileApprox` is different from other quantile functions in ClickHouse, because it enables user to control the accuracy of the approximate quantile result. - -**Syntax** - -``` sql -quantileApprox(accuracy, level)(expr) -``` - -Alias: `medianApprox`. - -**Arguments** - -- `accuracy` — Accuracy of quantile. Constant positive integer. Larger accuracy value means less error. For example, if the accuracy argument is set to 100, the computed quantile will have an error no greater than 1% with high probability. There is a trade-off between the accuracy of the computed quantiles and the computational complexity of the algorithm. A larger accuracy requires more memory and computational resources to compute the quantile accurately, while a smaller accuracy argument allows for a faster and more memory-efficient computation but with a slightly lower accuracy. - -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). - -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). - - -**Returned value** - -- Quantile of the specified level and accuracy. - - -Type: - -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. - -**Example** - -``` sql -SELECT quantileApprox(1, 0.25)(number + 1) -FROM numbers(1000) - -┌─quantileApprox(1, 0.25)(plus(number, 1))─┐ -│ 1 │ -└──────────────────────────────────────────┘ - -SELECT quantileApprox(10, 0.25)(number + 1) -FROM numbers(1000) - -┌─quantileApprox(10, 0.25)(plus(number, 1))─┐ -│ 156 │ -└───────────────────────────────────────────┘ - -SELECT quantileApprox(100, 0.25)(number + 1) -FROM numbers(1000) - -┌─quantileApprox(100, 0.25)(plus(number, 1))─┐ -│ 251 │ -└────────────────────────────────────────────┘ - -SELECT quantileApprox(1000, 0.25)(number + 1) -FROM numbers(1000) - -┌─quantileApprox(1000, 0.25)(plus(number, 1))─┐ -│ 249 │ -└─────────────────────────────────────────────┘ -``` - - -**See Also** - -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md new file mode 100644 index 00000000000..7352781d126 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileGK.md @@ -0,0 +1,76 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/quantileGK +sidebar_position: 204 +--- + +# quantileGK + +Computes the [quantile](https://en.wikipedia.org/wiki/Quantile) of a numeric data sequence using the [Greenwald-Khanna](http://infolab.stanford.edu/~datar/courses/cs361a/papers/quantiles.pdf) algorithm. The Greenwald-Khanna algorithm is an algorithm used to compute quantiles on a stream of data in a highly efficient manner. It was introduced by Michael Greenwald and Sanjeev Khanna in 2001. It is widely used in databases and big data systems where computing accurate quantiles on a large stream of data in real-time is necessary. The algorithm is highly efficient, taking only O(log n) space and O(log log n) time per item (where n is the size of the input). It is also highly accurate, providing an approximate quantile value with high probability. + +`quantileGK` is different from other quantile functions in ClickHouse, because it enables user to control the accuracy of the approximate quantile result. + +**Syntax** + +``` sql +quantileGK(accuracy, level)(expr) +``` + +Alias: `medianGK`. + +**Arguments** + +- `accuracy` — Accuracy of quantile. Constant positive integer. Larger accuracy value means less error. For example, if the accuracy argument is set to 100, the computed quantile will have an error no greater than 1% with high probability. There is a trade-off between the accuracy of the computed quantiles and the computational complexity of the algorithm. A larger accuracy requires more memory and computational resources to compute the quantile accurately, while a smaller accuracy argument allows for a faster and more memory-efficient computation but with a slightly lower accuracy. + +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). + +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). + + +**Returned value** + +- Quantile of the specified level and accuracy. + + +Type: + +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. + +**Example** + +``` sql +SELECT quantileGK(1, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileGK(1, 0.25)(plus(number, 1))─┐ +│ 1 │ +└──────────────────────────────────────┘ + +SELECT quantileGK(10, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileGK(10, 0.25)(plus(number, 1))─┐ +│ 156 │ +└───────────────────────────────────────┘ + +SELECT quantileGK(100, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileGK(100, 0.25)(plus(number, 1))─┐ +│ 251 │ +└────────────────────────────────────────┘ + +SELECT quantileGK(1000, 0.25)(number + 1) +FROM numbers(1000) + +┌─quantileGK(1000, 0.25)(plus(number, 1))─┐ +│ 249 │ +└─────────────────────────────────────────┘ +``` + + +**See Also** + +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md index 94feca9e69e..4377f2f1b17 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md @@ -18,15 +18,15 @@ Alias: `medianBFloat16` **Arguments** -- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md). +- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md). **Parameters** -- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md). +- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md). **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64). @@ -64,5 +64,5 @@ Like `quantileBFloat16` but takes into account the weight of each sequence membe **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md index 26826afd126..446d287f0d2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md @@ -21,19 +21,19 @@ Alias: `medianDeterministic`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). -- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly. +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `determinator` — Number whose hash is used instead of a random number generator in the reservoir sampling algorithm to make the result of sampling deterministic. As a determinator you can use any deterministic positive number, for example, a user id or an event id. If the same determinator value occures too often, the function works incorrectly. **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -64,5 +64,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md index 04fe597a34e..d7d7413c283 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md @@ -23,18 +23,18 @@ Alias: `medianExact`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -82,18 +82,18 @@ Alias: `medianExactLow`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -132,18 +132,18 @@ Alias: `medianExactHigh`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -179,21 +179,21 @@ quantileExactExclusive(level)(expr) **Arguments** -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Parameters** -- `level` — Level of quantile. Optional. Possible values: (0, 1) — bounds not included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). +- `level` — Level of quantile. Optional. Possible values: (0, 1) — bounds not included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -231,21 +231,21 @@ quantileExactInclusive(level)(expr) **Arguments** -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Parameters** -- `level` — Level of quantile. Optional. Possible values: [0, 1] — bounds included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). +- `level` — Level of quantile. Optional. Possible values: [0, 1] — bounds included. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). [Float](../../../sql-reference/data-types/float.md). **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -267,5 +267,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md index 16e6438a3bf..34def8d7411 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md @@ -21,19 +21,19 @@ Alias: `medianExactWeighted`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -64,5 +64,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md index 07fcd187217..41d2627fb7b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantileinterpolatedweighted.md @@ -21,19 +21,19 @@ Alias: `medianInterpolatedWeighted`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `weight` — Column with weights of sequence members. Weight is a number of value occurrences. **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -64,5 +64,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md index fe22ad7b048..38db39d2eec 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md @@ -29,21 +29,21 @@ quantilesExactExclusive(level1, level2, ...)(expr) **Arguments** -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Parameters** -- `level` — Levels of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). +- `level` — Levels of quantiles. Possible values: (0, 1) — bounds not included. [Float](../../../sql-reference/data-types/float.md). **Returned value** -- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. Type of array values: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -81,21 +81,21 @@ quantilesExactInclusive(level1, level2, ...)(expr) **Arguments** -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Parameters** -- `level` — Levels of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). +- `level` — Levels of quantiles. Possible values: [0, 1] — bounds included. [Float](../../../sql-reference/data-types/float.md). **Returned value** -- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. Type of array values: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -115,25 +115,25 @@ Result: └─────────────────────────────────────────────────────────────────────┘ ``` -## quantilesApprox +## quantilesGK -`quantilesApprox` works similarly with `quantileApprox` but allows us to calculate quantities at different levels simultaneously and returns an array. +`quantilesGK` works similarly with `quantileGK` but allows us to calculate quantities at different levels simultaneously and returns an array. **Syntax** ``` sql -quantilesApprox(accuracy, level1, level2, ...)(expr) +quantilesGK(accuracy, level1, level2, ...)(expr) ``` **Returned value** -- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. +- [Array](../../../sql-reference/data-types/array.md) of quantiles of the specified levels. Type of array values: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -141,32 +141,32 @@ Query: ``` sql -SELECT quantilesApprox(1, 0.25, 0.5, 0.75)(number + 1) +SELECT quantilesGK(1, 0.25, 0.5, 0.75)(number + 1) FROM numbers(1000) -┌─quantilesApprox(1, 0.25, 0.5, 0.75)(plus(number, 1))─┐ -│ [1,1,1] │ -└──────────────────────────────────────────────────────┘ +┌─quantilesGK(1, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [1,1,1] │ +└──────────────────────────────────────────────────┘ -SELECT quantilesApprox(10, 0.25, 0.5, 0.75)(number + 1) +SELECT quantilesGK(10, 0.25, 0.5, 0.75)(number + 1) FROM numbers(1000) -┌─quantilesApprox(10, 0.25, 0.5, 0.75)(plus(number, 1))─┐ -│ [156,413,659] │ -└───────────────────────────────────────────────────────┘ +┌─quantilesGK(10, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [156,413,659] │ +└───────────────────────────────────────────────────┘ -SELECT quantilesApprox(100, 0.25, 0.5, 0.75)(number + 1) +SELECT quantilesGK(100, 0.25, 0.5, 0.75)(number + 1) FROM numbers(1000) -┌─quantilesApprox(100, 0.25, 0.5, 0.75)(plus(number, 1))─┐ -│ [251,498,741] │ -└────────────────────────────────────────────────────────┘ +┌─quantilesGK(100, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [251,498,741] │ +└────────────────────────────────────────────────────┘ -SELECT quantilesApprox(1000, 0.25, 0.5, 0.75)(number + 1) +SELECT quantilesGK(1000, 0.25, 0.5, 0.75)(number + 1) FROM numbers(1000) -┌─quantilesApprox(1000, 0.25, 0.5, 0.75)(plus(number, 1))─┐ -│ [249,499,749] │ -└─────────────────────────────────────────────────────────┘ +┌─quantilesGK(1000, 0.25, 0.5, 0.75)(plus(number, 1))─┐ +│ [249,499,749] │ +└─────────────────────────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md index 5da37a4832f..796e87b02d8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md @@ -23,18 +23,18 @@ Alias: `medianTDigest`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -54,5 +54,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index e7abe08e39f..7b9addbbdde 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -27,19 +27,19 @@ Alias: `medianTDigestWeighted`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `expr` — Expression over the column values resulting in numeric [data types](../../../sql-reference/data-types/index.md#data_types), [Date](../../../sql-reference/data-types/date.md) or [DateTime](../../../sql-reference/data-types/datetime.md). +- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. **Returned value** -- Approximate quantile of the specified level. +- Approximate quantile of the specified level. Type: -- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. -- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. -- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. +- [Float64](../../../sql-reference/data-types/float.md) for numeric data type input. +- [Date](../../../sql-reference/data-types/date.md) if input values have the `Date` type. +- [DateTime](../../../sql-reference/data-types/datetime.md) if input values have the `DateTime` type. **Example** @@ -59,5 +59,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md index ead381b4497..b5b1c8a0c01 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -21,19 +21,19 @@ Alias: `medianTiming`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. +- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. - - If negative values are passed to the function, the behavior is undefined. - - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. + - If negative values are passed to the function, the behavior is undefined. + - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. **Accuracy** The calculation is accurate if: -- Total number of values does not exceed 5670. -- Total number of values exceeds 5670, but the page loading time is less than 1024ms. +- Total number of values does not exceed 5670. +- Total number of values exceeds 5670, but the page loading time is less than 1024ms. Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. @@ -43,7 +43,7 @@ For calculating page loading time quantiles, this function is more effective and **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: `Float32`. @@ -85,5 +85,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index f65c6b1e6ec..df483aac01e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -21,21 +21,21 @@ Alias: `medianTimingWeighted`. **Arguments** -- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). +- `level` — Level of quantile. Optional parameter. Constant floating-point number from 0 to 1. We recommend using a `level` value in the range of `[0.01, 0.99]`. Default value: 0.5. At `level=0.5` the function calculates [median](https://en.wikipedia.org/wiki/Median). -- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. +- `expr` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) over a column values returning a [Float\*](../../../sql-reference/data-types/float.md)-type number. - If negative values are passed to the function, the behavior is undefined. - If the value is greater than 30,000 (a page loading time of more than 30 seconds), it is assumed to be 30,000. -- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. +- `weight` — Column with weights of sequence elements. Weight is a number of value occurrences. **Accuracy** The calculation is accurate if: -- Total number of values does not exceed 5670. -- Total number of values exceeds 5670, but the page loading time is less than 1024ms. +- Total number of values does not exceed 5670. +- Total number of values exceeds 5670, but the page loading time is less than 1024ms. Otherwise, the result of the calculation is rounded to the nearest multiple of 16 ms. @@ -45,7 +45,7 @@ For calculating page loading time quantiles, this function is more effective and **Returned value** -- Quantile of the specified level. +- Quantile of the specified level. Type: `Float32`. @@ -118,5 +118,5 @@ Result: **See Also** -- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) -- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) +- [median](../../../sql-reference/aggregate-functions/reference/median.md#median) +- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles) diff --git a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md index 231eb2b091b..27f2dd124e4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md +++ b/docs/en/sql-reference/aggregate-functions/reference/rankCorr.md @@ -15,12 +15,12 @@ rankCorr(x, y) **Arguments** -- `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). -- `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). +- `x` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). +- `y` — Arbitrary value. [Float32](../../../sql-reference/data-types/float.md#float32-float64) or [Float64](../../../sql-reference/data-types/float.md#float32-float64). **Returned value(s)** -- Returns a rank correlation coefficient of the ranks of x and y. The value of the correlation coefficient ranges from -1 to +1. If less than two arguments are passed, the function will return an exception. The value close to +1 denotes a high linear relationship, and with an increase of one random variable, the second random variable also increases. The value close to -1 denotes a high linear relationship, and with an increase of one random variable, the second random variable decreases. The value close or equal to 0 denotes no relationship between the two random variables. +- Returns a rank correlation coefficient of the ranks of x and y. The value of the correlation coefficient ranges from -1 to +1. If less than two arguments are passed, the function will return an exception. The value close to +1 denotes a high linear relationship, and with an increase of one random variable, the second random variable also increases. The value close to -1 denotes a high linear relationship, and with an increase of one random variable, the second random variable decreases. The value close or equal to 0 denotes no relationship between the two random variables. Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64). @@ -55,4 +55,4 @@ Result: ``` **See Also** -- [Spearman's rank correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) \ No newline at end of file +- [Spearman's rank correlation coefficient](https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient) \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md index a6380b78a79..bcff05ada47 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md @@ -13,8 +13,8 @@ simpleLinearRegression(x, y) Parameters: -- `x` — Column with dependent variable values. -- `y` — Column with explanatory variable values. +- `x` — Column with dependent variable values. +- `y` — Column with explanatory variable values. Returned values: diff --git a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md index 05ea373d4da..e21dad5b2f5 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sparkbar.md @@ -21,18 +21,18 @@ sparkbar(buckets[, min_x, max_x])(x, y) **Parameters** -- `buckets` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md). -- `min_x` — The interval start. Optional parameter. -- `max_x` — The interval end. Optional parameter. +- `buckets` — The number of segments. Type: [Integer](../../../sql-reference/data-types/int-uint.md). +- `min_x` — The interval start. Optional parameter. +- `max_x` — The interval end. Optional parameter. **Arguments** -- `x` — The field with values. -- `y` — The field with the frequency of values. +- `x` — The field with values. +- `y` — The field with the frequency of values. **Returned value** -- The frequency histogram. +- The frequency histogram. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md index 8126a80e25e..9481172c25b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md @@ -72,5 +72,5 @@ The query will return a column of predicted values. Note that first argument of **See Also** -- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) -- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) +- [stochasticLogisticRegression](../../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) +- [Difference between linear and logistic regressions](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) diff --git a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md index 41eeb70c04f..0a040689681 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md @@ -52,5 +52,5 @@ stochasticLogisticRegression(1.0, 1.0, 10, 'SGD') **See Also** -- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md#agg_functions-stochasticlinearregression) -- [Difference between linear and logistic regressions.](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) +- [stochasticLinearRegression](../../../sql-reference/aggregate-functions/reference/stochasticlinearregression.md#agg_functions-stochasticlinearregression) +- [Difference between linear and logistic regressions.](https://stackoverflow.com/questions/12146914/what-is-the-difference-between-linear-regression-and-logistic-regression) diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md index 5ce0c769576..29b43851f44 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -19,22 +19,22 @@ The null hypothesis is that means of populations are equal. Normal distribution **Arguments** -- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). **Parameters** -- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). +- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). **Returned values** [Tuple](../../../sql-reference/data-types/tuple.md) with two or four elements (if the optional `confidence_level` is specified): -- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). -- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). -- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md). -- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md). +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md). +- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -68,5 +68,5 @@ Result: **See Also** -- [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test) -- [welchTTest function](welchttest.md#welchttest) +- [Student's t-test](https://en.wikipedia.org/wiki/Student%27s_t-test) +- [welchTTest function](welchttest.md#welchttest) diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md index b7773b0d09b..a59b87022d6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumcount.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumcount.md @@ -14,11 +14,11 @@ sumCount(x) **Arguments** -- `x` — Input value, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). +- `x` — Input value, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). **Returned value** -- Tuple `(sum, count)`, where `sum` is the sum of numbers and `count` is the number of rows with not-NULL values. +- Tuple `(sum, count)`, where `sum` is the sum of numbers and `count` is the number of rows with not-NULL values. Type: [Tuple](../../../sql-reference/data-types/tuple.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md index 3e0783e9ad2..1a729b18b42 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sumkahan.md @@ -17,11 +17,11 @@ sumKahan(x) **Arguments** -- `x` — Input value, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). +- `x` — Input value, must be [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md). **Returned value** -- the sum of numbers, with type [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md) depends on type of input arguments +- the sum of numbers, with type [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), or [Decimal](../../../sql-reference/data-types/decimal.md) depends on type of input arguments **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md index f2103d7862b..ef19438a53a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/theilsu.md +++ b/docs/en/sql-reference/aggregate-functions/reference/theilsu.md @@ -16,11 +16,11 @@ theilsU(column1, column2) **Arguments** -- `column1` and `column2` are the columns to be compared +- `column1` and `column2` are the columns to be compared **Returned value** -- a value between -1 and 1 +- a value between -1 and 1 **Return type** is always [Float64](../../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/aggregate-functions/reference/topk.md b/docs/en/sql-reference/aggregate-functions/reference/topk.md index b025f6f6d54..bde29275f79 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topk.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topk.md @@ -19,13 +19,13 @@ We recommend using the `N < 10` value; performance is reduced with large `N` val **Arguments** -- `N` – The number of elements to return. +- `N` – The number of elements to return. If the parameter is omitted, default value 10 is used. **Arguments** -- `x` – The value to calculate frequency. +- `x` – The value to calculate frequency. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index 32b234fd6b8..03932e88a6a 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -15,9 +15,9 @@ topKWeighted(N)(x, weight) **Arguments** -- `N` — The number of elements to return. -- `x` — The value. -- `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `N` — The number of elements to return. +- `x` — The value. +- `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -42,4 +42,4 @@ Result: **See Also** -- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) +- [topK](../../../sql-reference/aggregate-functions/reference/topk.md) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniq.md b/docs/en/sql-reference/aggregate-functions/reference/uniq.md index d72311b3ede..b1c8336630b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniq.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniq.md @@ -17,24 +17,24 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Returned value** -- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. +- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. **Implementation details** Function: -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. +- Calculates a hash for all parameters in the aggregate, then uses it in calculations. -- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. +- Uses an adaptive sampling algorithm. For the calculation state, the function uses a sample of element hash values up to 65536. This algorithm is very accurate and very efficient on the CPU. When the query contains several of these functions, using `uniq` is almost as fast as using other aggregate functions. -- Provides the result deterministically (it does not depend on the query processing order). +- Provides the result deterministically (it does not depend on the query processing order). We recommend using this function in almost all scenarios. **See Also** -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) -- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index f1287c6ff9b..2f3efde859d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -21,19 +21,19 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Returned value** -- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. +- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. **Implementation details** Function: -- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. +- Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. -- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. +- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. -- Provides the result deterministically (it does not depend on the query processing order). +- Provides the result deterministically (it does not depend on the query processing order). :::note Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) @@ -41,14 +41,14 @@ Since it uses 32-bit hash for non-`String` type, the result will have very high Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined`: -- Consumes several times less memory. -- Calculates with several times higher accuracy. -- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network. +- Consumes several times less memory. +- Calculates with several times higher accuracy. +- Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network. **See Also** -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) -- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md index 901c631b756..fd68a464881 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md @@ -21,7 +21,7 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **See Also** -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqcombined) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqhll12) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqcombined) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniqhll12) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md index b598ad23df3..8594ebb3782 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md @@ -17,25 +17,25 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Returned value** -- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. +- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. **Implementation details** Function: -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. +- Calculates a hash for all parameters in the aggregate, then uses it in calculations. -- Uses the HyperLogLog algorithm to approximate the number of different argument values. +- Uses the HyperLogLog algorithm to approximate the number of different argument values. 2^12 5-bit cells are used. The size of the state is slightly more than 2.5 KB. The result is not very accurate (up to ~10% error) for small data sets (<10K elements). However, the result is fairly accurate for high-cardinality data sets (10K-100M), with a maximum error of ~1.6%. Starting from 100M, the estimation error increases, and the function will return very inaccurate results for data sets with extremely high cardinality (1B+ elements). -- Provides the determinate result (it does not depend on the query processing order). +- Provides the determinate result (it does not depend on the query processing order). We do not recommend using this function. In most cases, use the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) or [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) function. **See Also** -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) -- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md index e2adf672909..45970f144cb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md @@ -16,24 +16,24 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Returned value** -- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. +- A [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. **Implementation details** Function: -- Calculates a hash for all parameters in the aggregate, then uses it in calculations. +- Calculates a hash for all parameters in the aggregate, then uses it in calculations. -- Uses the [KMV](https://datasketches.apache.org/docs/Theta/InverseEstimate.html) algorithm to approximate the number of different argument values. +- Uses the [KMV](https://datasketches.apache.org/docs/Theta/InverseEstimate.html) algorithm to approximate the number of different argument values. 4096(2^12) 64-bit sketch are used. The size of the state is about 41 KB. -- The relative error is 3.125% (95% confidence), see the [relative error table](https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html) for detail. +- The relative error is 3.125% (95% confidence), see the [relative error table](https://datasketches.apache.org/docs/Theta/ThetaErrorTable.html) for detail. **See Also** -- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) -- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) -- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) -- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) -- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined) +- [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index ab35f8794e6..4f1085e65b4 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -19,21 +19,21 @@ The null hypothesis is that means of populations are equal. Normal distribution **Arguments** -- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). -- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). +- `sample_data` — Sample data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md) or [Decimal](../../../sql-reference/data-types/decimal.md). +- `sample_index` — Sample index. [Integer](../../../sql-reference/data-types/int-uint.md). **Parameters** -- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). +- `confidence_level` — Confidence level in order to calculate confidence intervals. [Float](../../../sql-reference/data-types/float.md). **Returned values** [Tuple](../../../sql-reference/data-types/tuple.md) with two or four elements (if the optional `confidence_level` is specified) -- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). -- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). -- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). -- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). +- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). +- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md). +- calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -67,5 +67,5 @@ Result: **See Also** -- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test) -- [studentTTest function](studentttest.md#studentttest) +- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test) +- [studentTTest function](studentttest.md#studentttest) diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index d8547f03714..fe6d7ebe0dc 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -12,9 +12,9 @@ Aggregate functions can have an implementation-defined intermediate state that c **Parameters** -- Name of the aggregate function. If the function is parametric, specify its parameters too. +- Name of the aggregate function. If the function is parametric, specify its parameters too. -- Types of the aggregate function arguments. +- Types of the aggregate function arguments. **Example** @@ -63,3 +63,8 @@ SELECT uniqMerge(state) FROM (SELECT uniqState(UserID) AS state FROM table GROUP ## Usage Example See [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine description. + + +## Related Content + +- Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index d43a00312dd..048466f7ae4 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -39,6 +39,6 @@ SELECT * FROM dt; **See Also** -- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) -- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) -- [`DateTime` data type](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) +- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) +- [`DateTime` data type](../../sql-reference/data-types/datetime.md) diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index 826dc20a96d..7cf8b1b95fe 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -35,6 +35,6 @@ SELECT * FROM new; **See Also** -- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32) -- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero) -- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null) +- [toDate32](../../sql-reference/functions/type-conversion-functions.md#todate32) +- [toDate32OrZero](../../sql-reference/functions/type-conversion-functions.md#todate32-or-zero) +- [toDate32OrNull](../../sql-reference/functions/type-conversion-functions.md#todate32-or-null) diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index b10ceb79d13..059c6acdb9e 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -62,8 +62,8 @@ SELECT * FROM dt; └─────────────────────┴──────────┘ ``` -- When inserting datetime as an integer, it is treated as Unix Timestamp (UTC). `1546300800` represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Asia/Istanbul` (UTC+3) timezone specified, when outputting as string the value will be shown as `'2019-01-01 03:00:00'` -- When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Asia/Istanbul` timezone and saved as `1546290000`. +- When inserting datetime as an integer, it is treated as Unix Timestamp (UTC). `1546300800` represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Asia/Istanbul` (UTC+3) timezone specified, when outputting as string the value will be shown as `'2019-01-01 03:00:00'` +- When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Asia/Istanbul` timezone and saved as `1546290000`. **2.** Filtering on `DateTime` values @@ -137,11 +137,11 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse ## See Also -- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) -- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) -- [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) -- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) -- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) -- [The `Date` data type](../../sql-reference/data-types/date.md) +- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) +- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) +- [Functions for working with arrays](../../sql-reference/functions/array-functions.md) +- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) +- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) +- [The `Date` data type](../../sql-reference/data-types/date.md) diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index fa3a1eecd46..2d4035831fa 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -52,8 +52,8 @@ SELECT * FROM dt; └─────────────────────────┴──────────┘ ``` -- When inserting datetime as an integer, it is treated as an appropriately scaled Unix Timestamp (UTC). `1546300800000` (with precision 3) represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Asia/Istanbul` (UTC+3) timezone specified, when outputting as a string the value will be shown as `'2019-01-01 03:00:00'`. Inserting datetime as a decimal will treat it similarly as an integer, except the value before the decimal point is the Unix Timestamp up to and including the seconds, and after the decimal point will be treated as the precision. -- When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Asia/Istanbul` timezone and stored as `1546290000000`. +- When inserting datetime as an integer, it is treated as an appropriately scaled Unix Timestamp (UTC). `1546300800000` (with precision 3) represents `'2019-01-01 00:00:00'` UTC. However, as `timestamp` column has `Asia/Istanbul` (UTC+3) timezone specified, when outputting as a string the value will be shown as `'2019-01-01 03:00:00'`. Inserting datetime as a decimal will treat it similarly as an integer, except the value before the decimal point is the Unix Timestamp up to and including the seconds, and after the decimal point will be treated as the precision. +- When inserting string value as datetime, it is treated as being in column timezone. `'2019-01-01 00:00:00'` will be treated as being in `Asia/Istanbul` timezone and stored as `1546290000000`. 2. Filtering on `DateTime64` values @@ -113,12 +113,12 @@ FROM dt; **See Also** -- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) -- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) -- [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) -- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) -- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) -- [`Date` data type](../../sql-reference/data-types/date.md) -- [`DateTime` data type](../../sql-reference/data-types/datetime.md) +- [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) +- [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) +- [Functions for working with arrays](../../sql-reference/functions/array-functions.md) +- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) +- [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) +- [`Date` data type](../../sql-reference/data-types/date.md) +- [`DateTime` data type](../../sql-reference/data-types/datetime.md) diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index 7e4cb5ecaac..8df8b2519e3 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -10,8 +10,8 @@ Signed fixed-point numbers that keep precision during add, subtract and multiply ## Parameters -- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). -- S - scale. Valid range: \[ 0 : P \]. Determines how many decimal digits fraction can have. +- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). +- S - scale. Valid range: \[ 0 : P \]. Determines how many decimal digits fraction can have. Depending on P parameter value Decimal(P, S) is a synonym for: - P from \[ 1 : 9 \] - for Decimal32(S) @@ -21,10 +21,10 @@ Depending on P parameter value Decimal(P, S) is a synonym for: ## Decimal Value Ranges -- Decimal32(S) - ( -1 \* 10^(9 - S), 1 \* 10^(9 - S) ) -- Decimal64(S) - ( -1 \* 10^(18 - S), 1 \* 10^(18 - S) ) -- Decimal128(S) - ( -1 \* 10^(38 - S), 1 \* 10^(38 - S) ) -- Decimal256(S) - ( -1 \* 10^(76 - S), 1 \* 10^(76 - S) ) +- Decimal32(S) - ( -1 \* 10^(9 - S), 1 \* 10^(9 - S) ) +- Decimal64(S) - ( -1 \* 10^(18 - S), 1 \* 10^(18 - S) ) +- Decimal128(S) - ( -1 \* 10^(38 - S), 1 \* 10^(38 - S) ) +- Decimal256(S) - ( -1 \* 10^(76 - S), 1 \* 10^(76 - S) ) For example, Decimal32(4) can contain numbers from -99999.9999 to 99999.9999 with 0.0001 step. @@ -38,16 +38,16 @@ Because modern CPUs do not support 128-bit integers natively, operations on Deci Binary operations on Decimal result in wider result type (with any order of arguments). -- `Decimal64(S1) Decimal32(S2) -> Decimal64(S)` -- `Decimal128(S1) Decimal32(S2) -> Decimal128(S)` -- `Decimal128(S1) Decimal64(S2) -> Decimal128(S)` -- `Decimal256(S1) Decimal<32|64|128>(S2) -> Decimal256(S)` +- `Decimal64(S1) Decimal32(S2) -> Decimal64(S)` +- `Decimal128(S1) Decimal32(S2) -> Decimal128(S)` +- `Decimal128(S1) Decimal64(S2) -> Decimal128(S)` +- `Decimal256(S1) Decimal<32|64|128>(S2) -> Decimal256(S)` Rules for scale: -- add, subtract: S = max(S1, S2). -- multiply: S = S1 + S2. -- divide: S = S1. +- add, subtract: S = max(S1, S2). +- multiply: S = S1 + S2. +- divide: S = S1. For similar operations between Decimal and integers, the result is Decimal of the same size as an argument. @@ -109,5 +109,5 @@ DB::Exception: Can't compare. ``` **See also** -- [isDecimalOverflow](../../sql-reference/functions/other-functions.md#is-decimal-overflow) -- [countDigits](../../sql-reference/functions/other-functions.md#count-digits) +- [isDecimalOverflow](../../sql-reference/functions/other-functions.md#is-decimal-overflow) +- [countDigits](../../sql-reference/functions/other-functions.md#count-digits) diff --git a/docs/en/sql-reference/data-types/domains/index.md b/docs/en/sql-reference/data-types/domains/index.md index f6f14186dcc..13ec1735d4d 100644 --- a/docs/en/sql-reference/data-types/domains/index.md +++ b/docs/en/sql-reference/data-types/domains/index.md @@ -10,20 +10,20 @@ Domains are special-purpose types that add some extra features atop of existing You can use domains anywhere corresponding base type can be used, for example: -- Create a column of a domain type -- Read/write values from/to domain column -- Use it as an index if a base type can be used as an index -- Call functions with values of domain column +- Create a column of a domain type +- Read/write values from/to domain column +- Use it as an index if a base type can be used as an index +- Call functions with values of domain column ### Extra Features of Domains -- Explicit column type name in `SHOW CREATE TABLE` or `DESCRIBE TABLE` -- Input from human-friendly format with `INSERT INTO domain_table(domain_column) VALUES(...)` -- Output to human-friendly format for `SELECT domain_column FROM domain_table` -- Loading data from an external source in the human-friendly format: `INSERT INTO domain_table FORMAT CSV ...` +- Explicit column type name in `SHOW CREATE TABLE` or `DESCRIBE TABLE` +- Input from human-friendly format with `INSERT INTO domain_table(domain_column) VALUES(...)` +- Output to human-friendly format for `SELECT domain_column FROM domain_table` +- Loading data from an external source in the human-friendly format: `INSERT INTO domain_table FORMAT CSV ...` ### Limitations -- Can’t convert index column of base type to domain type via `ALTER TABLE`. -- Can’t implicitly convert string values into domain values when inserting data from another column or table. -- Domain adds no constrains on stored values. +- Can’t convert index column of base type to domain type via `ALTER TABLE`. +- Can’t implicitly convert string values into domain values when inserting data from another column or table. +- Domain adds no constrains on stored values. diff --git a/docs/en/sql-reference/data-types/enum.md b/docs/en/sql-reference/data-types/enum.md index a685b341414..02e73a0360e 100644 --- a/docs/en/sql-reference/data-types/enum.md +++ b/docs/en/sql-reference/data-types/enum.md @@ -12,8 +12,8 @@ Named values can be declared as `'string' = integer` pairs or `'string'` names . ClickHouse supports: -- 8-bit `Enum`. It can contain up to 256 values enumerated in the `[-128, 127]` range. -- 16-bit `Enum`. It can contain up to 65536 values enumerated in the `[-32768, 32767]` range. +- 8-bit `Enum`. It can contain up to 256 values enumerated in the `[-128, 127]` range. +- 16-bit `Enum`. It can contain up to 65536 values enumerated in the `[-32768, 32767]` range. ClickHouse automatically chooses the type of `Enum` when data is inserted. You can also use `Enum8` or `Enum16` types to be sure in the size of storage. diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index e28f957e49c..a56b3fccbc1 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -20,17 +20,17 @@ The `FixedString` type is efficient when data has the length of precisely `N` by Examples of the values that can be efficiently stored in `FixedString`-typed columns: -- The binary representation of IP addresses (`FixedString(16)` for IPv6). -- Language codes (ru_RU, en_US … ). -- Currency codes (USD, RUB … ). -- Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256). +- The binary representation of IP addresses (`FixedString(16)` for IPv6). +- Language codes (ru_RU, en_US … ). +- Currency codes (USD, RUB … ). +- Binary representation of hashes (`FixedString(16)` for MD5, `FixedString(32)` for SHA256). To store UUID values, use the [UUID](../../sql-reference/data-types/uuid.md) data type. When inserting the data, ClickHouse: -- Complements a string with null bytes if the string contains fewer than `N` bytes. -- Throws the `Too large value for FixedString(N)` exception if the string contains more than `N` bytes. +- Complements a string with null bytes if the string contains fewer than `N` bytes. +- Throws the `Too large value for FixedString(N)` exception if the string contains more than `N` bytes. When selecting the data, ClickHouse does not remove the null bytes at the end of the string. If you use the `WHERE` clause, you should add null bytes manually to match the `FixedString` value. The following example illustrates how to use the `WHERE` clause with `FixedString`. diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index c89b24ad235..3b55271f707 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -30,19 +30,19 @@ SELECT sumKahan(my_float), sumKahan(my_decimal) FROM float_vs_decimal; Types are equivalent to types of C: -- `Float32` — `float`. -- `Float64` — `double`. +- `Float32` — `float`. +- `Float64` — `double`. Aliases: -- `Float32` — `FLOAT`. -- `Float64` — `DOUBLE`. +- `Float32` — `FLOAT`. +- `Float64` — `DOUBLE`. When creating tables, numeric parameters for floating point numbers can be set (e.g. `FLOAT(12)`, `FLOAT(15, 22)`, `DOUBLE(12)`, `DOUBLE(4, 18)`), but ClickHouse ignores them. ## Using Floating-point Numbers -- Computations with floating-point numbers might produce a rounding error. +- Computations with floating-point numbers might produce a rounding error. @@ -56,15 +56,15 @@ SELECT 1 - 0.9 └─────────────────────┘ ``` -- The result of the calculation depends on the calculation method (the processor type and architecture of the computer system). -- Floating-point calculations might result in numbers such as infinity (`Inf`) and “not-a-number” (`NaN`). This should be taken into account when processing the results of calculations. -- When parsing floating-point numbers from text, the result might not be the nearest machine-representable number. +- The result of the calculation depends on the calculation method (the processor type and architecture of the computer system). +- Floating-point calculations might result in numbers such as infinity (`Inf`) and “not-a-number” (`NaN`). This should be taken into account when processing the results of calculations. +- When parsing floating-point numbers from text, the result might not be the nearest machine-representable number. ## NaN and Inf In contrast to standard SQL, ClickHouse supports the following categories of floating-point numbers: -- `Inf` – Infinity. +- `Inf` – Infinity. @@ -78,7 +78,7 @@ SELECT 0.5 / 0 └────────────────┘ ``` -- `-Inf` — Negative infinity. +- `-Inf` — Negative infinity. @@ -92,7 +92,7 @@ SELECT -0.5 / 0 └─────────────────┘ ``` -- `NaN` — Not a number. +- `NaN` — Not a number. diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md index b8a55b62e36..b551143d92f 100644 --- a/docs/en/sql-reference/data-types/int-uint.md +++ b/docs/en/sql-reference/data-types/int-uint.md @@ -12,25 +12,25 @@ When creating tables, numeric parameters for integer numbers can be set (e.g. `T ## Int Ranges -- `Int8` — \[-128 : 127\] -- `Int16` — \[-32768 : 32767\] -- `Int32` — \[-2147483648 : 2147483647\] -- `Int64` — \[-9223372036854775808 : 9223372036854775807\] -- `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\] -- `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\] +- `Int8` — \[-128 : 127\] +- `Int16` — \[-32768 : 32767\] +- `Int32` — \[-2147483648 : 2147483647\] +- `Int64` — \[-9223372036854775808 : 9223372036854775807\] +- `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\] +- `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\] Aliases: -- `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`. -- `Int16` — `SMALLINT`, `INT2`. -- `Int32` — `INT`, `INT4`, `INTEGER`. -- `Int64` — `BIGINT`. +- `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`. +- `Int16` — `SMALLINT`, `INT2`. +- `Int32` — `INT`, `INT4`, `INTEGER`. +- `Int64` — `BIGINT`. ## UInt Ranges -- `UInt8` — \[0 : 255\] -- `UInt16` — \[0 : 65535\] -- `UInt32` — \[0 : 4294967295\] -- `UInt64` — \[0 : 18446744073709551615\] -- `UInt128` — \[0 : 340282366920938463463374607431768211455\] -- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\] +- `UInt8` — \[0 : 255\] +- `UInt16` — \[0 : 65535\] +- `UInt32` — \[0 : 4294967295\] +- `UInt64` — \[0 : 18446744073709551615\] +- `UInt128` — \[0 : 340282366920938463463374607431768211455\] +- `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\] diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 58a99baa09e..7810f4c5324 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -16,7 +16,7 @@ LowCardinality(data_type) **Parameters** -- `data_type` — [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), and numbers excepting [Decimal](../../sql-reference/data-types/decimal.md). `LowCardinality` is not efficient for some data types, see the [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) setting description. +- `data_type` — [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), and numbers excepting [Decimal](../../sql-reference/data-types/decimal.md). `LowCardinality` is not efficient for some data types, see the [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) setting description. ## Description @@ -44,19 +44,19 @@ ORDER BY id Settings: -- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size) -- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) -- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) -- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) -- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) +- [low_cardinality_max_dictionary_size](../../operations/settings/settings.md#low_cardinality_max_dictionary_size) +- [low_cardinality_use_single_dictionary_for_part](../../operations/settings/settings.md#low_cardinality_use_single_dictionary_for_part) +- [low_cardinality_allow_in_native_format](../../operations/settings/settings.md#low_cardinality_allow_in_native_format) +- [allow_suspicious_low_cardinality_types](../../operations/settings/settings.md#allow_suspicious_low_cardinality_types) +- [output_format_arrow_low_cardinality_as_dictionary](../../operations/settings/settings.md#output-format-arrow-low-cardinality-as-dictionary) Functions: -- [toLowCardinality](../../sql-reference/functions/type-conversion-functions.md#tolowcardinality) +- [toLowCardinality](../../sql-reference/functions/type-conversion-functions.md#tolowcardinality) ## Related content -- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/) -- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) +- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/) +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) - Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index b0659746ba7..0ea183d73d8 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -10,8 +10,8 @@ sidebar_label: Map(key, value) **Parameters** -- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). +- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). +- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). To get the value from an `a Map('key', 'value')` column, use `a['key']` syntax. This lookup works now with a linear complexity. @@ -106,5 +106,10 @@ Result: **See Also** -- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function -- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function +- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function +- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function + + +## Related content + +- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 342a0294eb6..517a28576f0 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -9,20 +9,20 @@ The common way to produce an aggregate function value is by calling the aggregat The following aggregate functions are supported: -- [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any) -- [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx) -- [`min`](../../sql-reference/aggregate-functions/reference/min.md#agg_function-min) -- [`max`](../../sql-reference/aggregate-functions/reference/max.md#agg_function-max) -- [`sum`](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) -- [`sumWithOverflow`](../../sql-reference/aggregate-functions/reference/sumwithoverflow.md#sumwithoverflowx) -- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference/groupbitand.md#groupbitand) -- [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor) -- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor) -- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray) -- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md) -- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) -- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) -- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) +- [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any) +- [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx) +- [`min`](../../sql-reference/aggregate-functions/reference/min.md#agg_function-min) +- [`max`](../../sql-reference/aggregate-functions/reference/max.md#agg_function-max) +- [`sum`](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) +- [`sumWithOverflow`](../../sql-reference/aggregate-functions/reference/sumwithoverflow.md#sumwithoverflowx) +- [`groupBitAnd`](../../sql-reference/aggregate-functions/reference/groupbitand.md#groupbitand) +- [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor) +- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor) +- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray) +- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md) +- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap) +- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) +- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) :::note @@ -33,8 +33,8 @@ Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way **Parameters** -- Name of the aggregate function. -- Types of the aggregate function arguments. +- Name of the aggregate function. +- Types of the aggregate function arguments. **Example** diff --git a/docs/en/sql-reference/data-types/special-data-types/interval.md b/docs/en/sql-reference/data-types/special-data-types/interval.md index 0ac5248e36c..c89c2e78752 100644 --- a/docs/en/sql-reference/data-types/special-data-types/interval.md +++ b/docs/en/sql-reference/data-types/special-data-types/interval.md @@ -14,19 +14,19 @@ The family of data types representing time and date intervals. The resulting typ Structure: -- Time interval as an unsigned integer value. -- Type of an interval. +- Time interval as an unsigned integer value. +- Type of an interval. Supported interval types: -- `SECOND` -- `MINUTE` -- `HOUR` -- `DAY` -- `WEEK` -- `MONTH` -- `QUARTER` -- `YEAR` +- `SECOND` +- `MINUTE` +- `HOUR` +- `DAY` +- `WEEK` +- `MONTH` +- `QUARTER` +- `YEAR` For each interval type, there is a separate data type. For example, the `DAY` interval corresponds to the `IntervalDay` data type: @@ -81,5 +81,5 @@ Code: 43. DB::Exception: Received from localhost:9000. DB::Exception: Wrong argu ## See Also -- [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator -- [toInterval](../../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions +- [INTERVAL](../../../sql-reference/operators/index.md#operator-interval) operator +- [toInterval](../../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions diff --git a/docs/en/sql-reference/data-types/string.md b/docs/en/sql-reference/data-types/string.md index fce16320240..f891a9303e5 100644 --- a/docs/en/sql-reference/data-types/string.md +++ b/docs/en/sql-reference/data-types/string.md @@ -13,7 +13,7 @@ When creating tables, numeric parameters for string fields can be set (e.g. `VAR Aliases: -- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`. +- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`. ## Encodings diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index f697b1ecdcf..48a8ce45d33 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -15,8 +15,8 @@ ClickHouse supports special functions for working with dictionaries that can be ClickHouse supports: -- Dictionaries with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). -- [Embedded dictionaries](#embedded_dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). +- Dictionaries with a [set of functions](../../sql-reference/functions/ext-dict-functions.md). +- [Embedded dictionaries](#embedded_dictionaries) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md). :::tip Tutorial @@ -27,9 +27,9 @@ You can add your own dictionaries from various data sources. The source for a di ClickHouse: -- Fully or partially stores dictionaries in RAM. -- Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically. -- Allows creating dictionaries with xml files or [DDL queries](../../sql-reference/statements/create/dictionary.md). +- Fully or partially stores dictionaries in RAM. +- Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically. +- Allows creating dictionaries with xml files or [DDL queries](../../sql-reference/statements/create/dictionary.md). The configuration of dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter. @@ -37,9 +37,9 @@ Dictionaries can be loaded at server startup or at first use, depending on the [ The [dictionaries](../../operations/system-tables/dictionaries.md#system_tables-dictionaries) system table contains information about dictionaries configured at server. For each dictionary you can find there: -- Status of the dictionary. -- Configuration parameters. -- Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded. +- Status of the dictionary. +- Configuration parameters. +- Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded. @@ -133,13 +133,13 @@ Caching is not recommended because of potentially poor performance and difficult There are several ways to improve dictionary performance: -- Call the function for working with the dictionary after `GROUP BY`. -- Mark attributes to extract as injective. An attribute is called injective if different attribute values correspond to different keys. So when `GROUP BY` uses a function that fetches an attribute value by the key, this function is automatically taken out of `GROUP BY`. +- Call the function for working with the dictionary after `GROUP BY`. +- Mark attributes to extract as injective. An attribute is called injective if different attribute values correspond to different keys. So when `GROUP BY` uses a function that fetches an attribute value by the key, this function is automatically taken out of `GROUP BY`. ClickHouse generates an exception for errors with dictionaries. Examples of errors: -- The dictionary being accessed could not be loaded. -- Error querying a `cached` dictionary. +- The dictionary being accessed could not be loaded. +- Error querying a `cached` dictionary. You can view the list of dictionaries and their statuses in the [system.dictionaries](../../operations/system-tables/dictionaries.md) table. @@ -201,22 +201,22 @@ Configuration example of a composite key (key has one element with [String](../. ## Ways to Store Dictionaries in Memory -- [flat](#flat) -- [hashed](#hashed) -- [sparse_hashed](#sparse_hashed) -- [complex_key_hashed](#complex_key_hashed) -- [complex_key_sparse_hashed](#complex_key_sparse_hashed) -- [hashed_array](#hashed_array) -- [complex_key_hashed_array](#complex_key_hashed_array) -- [range_hashed](#range_hashed) -- [complex_key_range_hashed](#complex_key_range_hashed) -- [cache](#cache) -- [complex_key_cache](#complex_key_cache) -- [ssd_cache](#ssd_cache) -- [complex_key_ssd_cache](#complex_key_ssd_cache) -- [direct](#direct) -- [complex_key_direct](#complex_key_direct) -- [ip_trie](#ip_trie) +- [flat](#flat) +- [hashed](#hashed) +- [sparse_hashed](#sparse_hashed) +- [complex_key_hashed](#complex_key_hashed) +- [complex_key_sparse_hashed](#complex_key_sparse_hashed) +- [hashed_array](#hashed_array) +- [complex_key_hashed_array](#complex_key_hashed_array) +- [range_hashed](#range_hashed) +- [complex_key_range_hashed](#complex_key_range_hashed) +- [cache](#cache) +- [complex_key_cache](#complex_key_cache) +- [ssd_cache](#ssd_cache) +- [complex_key_ssd_cache](#complex_key_ssd_cache) +- [direct](#direct) +- [complex_key_direct](#complex_key_direct) +- [ip_trie](#ip_trie) ### flat @@ -476,10 +476,10 @@ This function returns the value for the specified `id`s and the date range that Details of the algorithm: -- If the `id` is not found or a range is not found for the `id`, it returns the default value of the attribute's type. -- If there are overlapping ranges and `range_lookup_strategy=min`, it returns a matching range with minimal `range_min`, if several ranges found, it returns a range with minimal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. -- If there are overlapping ranges and `range_lookup_strategy=max`, it returns a matching range with maximal `range_min`, if several ranges found, it returns a range with maximal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. -- If the `range_max` is `NULL`, the range is open. `NULL` is treated as maximal possible value. For the `range_min` `1970-01-01` or `0` (-MAX_INT) can be used as the open value. +- If the `id` is not found or a range is not found for the `id`, it returns the default value of the attribute's type. +- If there are overlapping ranges and `range_lookup_strategy=min`, it returns a matching range with minimal `range_min`, if several ranges found, it returns a range with minimal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. +- If there are overlapping ranges and `range_lookup_strategy=max`, it returns a matching range with maximal `range_min`, if several ranges found, it returns a range with maximal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. +- If the `range_max` is `NULL`, the range is open. `NULL` is treated as maximal possible value. For the `range_min` `1970-01-01` or `0` (-MAX_INT) can be used as the open value. Configuration example: @@ -919,14 +919,14 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con When updating the dictionaries, the ClickHouse server applies different logic depending on the type of [source](#dictionary-sources): -- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. -- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`). -- Dictionaries from other sources are updated every time by default. +- For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated. +- For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`). +- Dictionaries from other sources are updated every time by default. For other sources (ODBC, PostgreSQL, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps: -- The dictionary table must have a field that always changes when the source data is updated. -- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](#dictionary-sources). +- The dictionary table must have a field that always changes when the source data is updated. +- The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `` field in the settings for the [source](#dictionary-sources). Example of settings: @@ -953,9 +953,9 @@ For `Cache`, `ComplexKeyCache`, `SSDCache`, and `SSDComplexKeyCache` dictionarie It is also possible for `Flat`, `Hashed`, `ComplexKeyHashed` dictionaries to only request data that was changed after the previous update. If `update_field` is specified as part of the dictionary source configuration, value of the previous update time in seconds will be added to the data request. Depends on source type (Executable, HTTP, MySQL, PostgreSQL, ClickHouse, or ODBC) different logic will be applied to `update_field` before request data from an external source. -- If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value. -- If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value. -- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time. +- If the source is HTTP then `update_field` will be added as a query parameter with the last update time as the parameter value. +- If the source is Executable then `update_field` will be added as an executable script argument with the last update time as the argument value. +- If the source is ClickHouse, MySQL, PostgreSQL, ODBC there will be an additional part of `WHERE`, where `update_field` is compared as greater or equal with the last update time. - Per default, this `WHERE`-condition is checked at the highest level of the SQL-Query. Alternatively, the condition can be checked in any other `WHERE`-clause within the query using the `{condition}`-keyword. Example: ```sql ... @@ -1054,18 +1054,18 @@ SETTINGS(format_csv_allow_single_quotes = 0) Types of sources (`source_type`): -- [Local file](#local_file) -- [Executable File](#executable) -- [Executable Pool](#executable_pool) -- [HTTP(s)](#http) -- DBMS - - [ODBC](#odbc) - - [MySQL](#mysql) - - [ClickHouse](#clickhouse) - - [MongoDB](#mongodb) - - [Redis](#redis) - - [Cassandra](#cassandra) - - [PostgreSQL](#postgresql) +- [Local file](#local_file) +- [Executable File](#executable) +- [Executable Pool](#executable_pool) +- [HTTP(s)](#http) +- DBMS + - [ODBC](#odbc) + - [MySQL](#mysql) + - [ClickHouse](#clickhouse) + - [MongoDB](#mongodb) + - [Redis](#redis) + - [Cassandra](#cassandra) + - [PostgreSQL](#postgresql) ## Local File {#local_file} @@ -1088,14 +1088,14 @@ SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated')) Setting fields: -- `path` – The absolute path to the file. -- `format` – The file format. All the formats described in [Formats](../../interfaces/formats.md#formats) are supported. +- `path` – The absolute path to the file. +- `format` – The file format. All the formats described in [Formats](../../interfaces/formats.md#formats) are supported. When a dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in the `user_files` directory to prevent DB users from accessing arbitrary files on the ClickHouse node. **See Also** -- [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function) +- [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function) ## Executable File {#executable} @@ -1115,14 +1115,14 @@ Example of settings: Setting fields: -- `command` — The absolute path to the executable file, or the file name (if the command's directory is in the `PATH`). -- `format` — The file format. All the formats described in [Formats](../../interfaces/formats.md#formats) are supported. -- `command_termination_timeout` — The executable script should contain a main read-write loop. After the dictionary is destroyed, the pipe is closed, and the executable file will have `command_termination_timeout` seconds to shutdown before ClickHouse will send a SIGTERM signal to the child process. `command_termination_timeout` is specified in seconds. Default value is 10. Optional parameter. -- `command_read_timeout` - Timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. -- `command_write_timeout` - Timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. -- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. -- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using a whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter. -- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. +- `command` — The absolute path to the executable file, or the file name (if the command's directory is in the `PATH`). +- `format` — The file format. All the formats described in [Formats](../../interfaces/formats.md#formats) are supported. +- `command_termination_timeout` — The executable script should contain a main read-write loop. After the dictionary is destroyed, the pipe is closed, and the executable file will have `command_termination_timeout` seconds to shutdown before ClickHouse will send a SIGTERM signal to the child process. `command_termination_timeout` is specified in seconds. Default value is 10. Optional parameter. +- `command_read_timeout` - Timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_write_timeout` - Timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. +- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. +- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using a whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `0`. Optional parameter. +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node. @@ -1148,16 +1148,16 @@ Example of settings: Setting fields: -- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`). -- `format` — The file format. All the formats described in “[Formats](../../interfaces/formats.md#formats)” are supported. -- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`. -- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter. -- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter. -- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. -- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. -- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter. -- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. -- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. +- `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`). +- `format` — The file format. All the formats described in “[Formats](../../interfaces/formats.md#formats)” are supported. +- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`. +- `command_termination_timeout` — executable script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter. +- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. +- `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter. +- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. @@ -1201,15 +1201,15 @@ In order for ClickHouse to access an HTTPS resource, you must [configure openSSL Setting fields: -- `url` – The source URL. -- `format` – The file format. All the formats described in “[Formats](../../interfaces/formats.md#formats)” are supported. -- `credentials` – Basic HTTP authentication. Optional parameter. -- `user` – Username required for the authentication. -- `password` – Password required for the authentication. -- `headers` – All custom HTTP headers entries used for the HTTP request. Optional parameter. -- `header` – Single HTTP header entry. -- `name` – Identifiant name used for the header send on the request. -- `value` – Value set for a specific identifiant name. +- `url` – The source URL. +- `format` – The file format. All the formats described in “[Formats](../../interfaces/formats.md#formats)” are supported. +- `credentials` – Basic HTTP authentication. Optional parameter. +- `user` – Username required for the authentication. +- `password` – Password required for the authentication. +- `headers` – All custom HTTP headers entries used for the HTTP request. Optional parameter. +- `header` – Single HTTP header entry. +- `name` – Identifiant name used for the header send on the request. +- `value` – Value set for a specific identifiant name. When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server. @@ -1461,11 +1461,11 @@ SOURCE(ODBC( Setting fields: -- `db` – Name of the database. Omit it if the database name is set in the `` parameters. -- `table` – Name of the table and schema if exists. -- `connection_string` – Connection string. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). -- `query` – The custom query. Optional parameter. +- `db` – Name of the database. Omit it if the database name is set in the `` parameters. +- `table` – Name of the table and schema if exists. +- `connection_string` – Connection string. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `query` – The custom query. Optional parameter. :::note The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. @@ -1523,28 +1523,28 @@ SOURCE(MYSQL( Setting fields: -- `port` – The port on the MySQL server. You can specify it for all replicas, or for each one individually (inside ``). +- `port` – The port on the MySQL server. You can specify it for all replicas, or for each one individually (inside ``). -- `user` – Name of the MySQL user. You can specify it for all replicas, or for each one individually (inside ``). +- `user` – Name of the MySQL user. You can specify it for all replicas, or for each one individually (inside ``). -- `password` – Password of the MySQL user. You can specify it for all replicas, or for each one individually (inside ``). +- `password` – Password of the MySQL user. You can specify it for all replicas, or for each one individually (inside ``). -- `replica` – Section of replica configurations. There can be multiple sections. +- `replica` – Section of replica configurations. There can be multiple sections. - `replica/host` – The MySQL host. - `replica/priority` – The replica priority. When attempting to connect, ClickHouse traverses the replicas in order of priority. The lower the number, the higher the priority. -- `db` – Name of the database. +- `db` – Name of the database. -- `table` – Name of the table. +- `table` – Name of the table. -- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter. +- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in MySQL, for example, `id > 10 AND id < 20`. Optional parameter. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). -- `fail_on_connection_loss` – The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`. +- `fail_on_connection_loss` – The configuration parameter that controls behavior of the server on connection loss. If `true`, an exception is thrown immediately if the connection between client and server was lost. If `false`, the ClickHouse server retries to execute the query three times before throwing an exception. Note that retrying leads to increased response times. Default value: `false`. -- `query` – The custom query. Optional parameter. +- `query` – The custom query. Optional parameter. :::note The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. @@ -1630,16 +1630,16 @@ SOURCE(CLICKHOUSE( Setting fields: -- `host` – The ClickHouse host. If it is a local host, the query is processed without any network activity. To improve fault tolerance, you can create a [Distributed](../../engines/table-engines/special/distributed.md) table and enter it in subsequent configurations. -- `port` – The port on the ClickHouse server. -- `user` – Name of the ClickHouse user. -- `password` – Password of the ClickHouse user. -- `db` – Name of the database. -- `table` – Name of the table. -- `where` – The selection criteria. May be omitted. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). -- `secure` - Use ssl for connection. -- `query` – The custom query. Optional parameter. +- `host` – The ClickHouse host. If it is a local host, the query is processed without any network activity. To improve fault tolerance, you can create a [Distributed](../../engines/table-engines/special/distributed.md) table and enter it in subsequent configurations. +- `port` – The port on the ClickHouse server. +- `user` – Name of the ClickHouse user. +- `password` – Password of the ClickHouse user. +- `db` – Name of the database. +- `table` – Name of the table. +- `where` – The selection criteria. May be omitted. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `secure` - Use ssl for connection. +- `query` – The custom query. Optional parameter. :::note The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. @@ -1677,12 +1677,12 @@ SOURCE(MONGODB( Setting fields: -- `host` – The MongoDB host. -- `port` – The port on the MongoDB server. -- `user` – Name of the MongoDB user. -- `password` – Password of the MongoDB user. -- `db` – Name of the database. -- `collection` – Name of the collection. +- `host` – The MongoDB host. +- `port` – The port on the MongoDB server. +- `user` – Name of the MongoDB user. +- `password` – Password of the MongoDB user. +- `db` – Name of the database. +- `collection` – Name of the collection. ### Redis @@ -1712,10 +1712,10 @@ SOURCE(REDIS( Setting fields: -- `host` – The Redis host. -- `port` – The port on the Redis server. -- `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. -- `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. +- `host` – The Redis host. +- `port` – The port on the Redis server. +- `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. +- `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. ### Cassandra @@ -1742,18 +1742,18 @@ Example of settings: Setting fields: -- `host` – The Cassandra host or comma-separated list of hosts. -- `port` – The port on the Cassandra servers. If not specified, default port 9042 is used. -- `user` – Name of the Cassandra user. -- `password` – Password of the Cassandra user. -- `keyspace` – Name of the keyspace (database). -- `column_family` – Name of the column family (table). -- `allow_filering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1. -- `partition_key_prefix` – Number of partition key columns in primary key of the Cassandra table. Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. Default value is 1 (the first key column is a partition key and other key columns are clustering key). -- `consistency` – Consistency level. Possible values: `One`, `Two`, `Three`, `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default value is `One`. -- `where` – Optional selection criteria. -- `max_threads` – The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries. -- `query` – The custom query. Optional parameter. +- `host` – The Cassandra host or comma-separated list of hosts. +- `port` – The port on the Cassandra servers. If not specified, default port 9042 is used. +- `user` – Name of the Cassandra user. +- `password` – Password of the Cassandra user. +- `keyspace` – Name of the keyspace (database). +- `column_family` – Name of the column family (table). +- `allow_filering` – Flag to allow or not potentially expensive conditions on clustering key columns. Default value is 1. +- `partition_key_prefix` – Number of partition key columns in primary key of the Cassandra table. Required for compose key dictionaries. Order of key columns in the dictionary definition must be the same as in Cassandra. Default value is 1 (the first key column is a partition key and other key columns are clustering key). +- `consistency` – Consistency level. Possible values: `One`, `Two`, `Three`, `All`, `EachQuorum`, `Quorum`, `LocalQuorum`, `LocalOne`, `Serial`, `LocalSerial`. Default value is `One`. +- `where` – Optional selection criteria. +- `max_threads` – The maximum number of threads to use for loading data from multiple partitions in compose key dictionaries. +- `query` – The custom query. Optional parameter. :::note The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared. @@ -1798,19 +1798,19 @@ SOURCE(POSTGRESQL( Setting fields: -- `host` – The host on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside ``). -- `port` – The port on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside ``). -- `user` – Name of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside ``). -- `password` – Password of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside ``). -- `replica` – Section of replica configurations. There can be multiple sections: - - `replica/host` – The PostgreSQL host. - - `replica/port` – The PostgreSQL port. - - `replica/priority` – The replica priority. When attempting to connect, ClickHouse traverses the replicas in order of priority. The lower the number, the higher the priority. -- `db` – Name of the database. -- `table` – Name of the table. -- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). -- `query` – The custom query. Optional parameter. +- `host` – The host on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside ``). +- `port` – The port on the PostgreSQL server. You can specify it for all replicas, or for each one individually (inside ``). +- `user` – Name of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside ``). +- `password` – Password of the PostgreSQL user. You can specify it for all replicas, or for each one individually (inside ``). +- `replica` – Section of replica configurations. There can be multiple sections: + - `replica/host` – The PostgreSQL host. + - `replica/port` – The PostgreSQL port. + - `replica/priority` – The replica priority. When attempting to connect, ClickHouse traverses the replicas in order of priority. The lower the number, the higher the priority. +- `db` – Name of the database. +- `table` – Name of the table. +- `where` – The selection criteria. The syntax for conditions is the same as for `WHERE` clause in PostgreSQL. For example, `id > 10 AND id < 20`. Optional parameter. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `query` – The custom query. Optional parameter. :::note The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. @@ -1860,8 +1860,8 @@ XML description: Attributes are described in the elements: -- `` — Key column -- `` — Data column: there can be a multiple number of attributes. +- `` — Key column +- `` — Data column: there can be a multiple number of attributes. DDL query: @@ -1876,15 +1876,15 @@ PRIMARY KEY Id Attributes are described in the query body: -- `PRIMARY KEY` — Key column -- `AttrName AttrType` — Data column. There can be a multiple number of attributes. +- `PRIMARY KEY` — Key column +- `AttrName AttrType` — Data column. There can be a multiple number of attributes. ## Key ClickHouse supports the following types of keys: -- Numeric key. `UInt64`. Defined in the `` tag or using `PRIMARY KEY` keyword. -- Composite key. Set of values of different types. Defined in the tag `` or `PRIMARY KEY` keyword. +- Numeric key. `UInt64`. Defined in the `` tag or using `PRIMARY KEY` keyword. +- Composite key. Set of values of different types. Defined in the tag `` or `PRIMARY KEY` keyword. An xml structure can contain either `` or ``. DDL-query must contain single `PRIMARY KEY`. @@ -1906,7 +1906,7 @@ Configuration example: Configuration fields: -- `name` – The name of the column with keys. +- `name` – The name of the column with keys. For DDL-query: @@ -1919,7 +1919,7 @@ PRIMARY KEY Id ... ``` -- `PRIMARY KEY` – The name of the column with keys. +- `PRIMARY KEY` – The name of the column with keys. ### Composite Key @@ -2118,8 +2118,8 @@ LAYOUT(POLYGON(STORE_POLYGON_KEY_COLUMN 1)) When configuring the polygon dictionary, the key must have one of two types: -- A simple polygon. It is an array of points. -- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it. +- A simple polygon. It is an array of points. +- MultiPolygon. It is an array of polygons. Each polygon is a two-dimensional array of points. The first element of this array is the outer boundary of the polygon, and subsequent elements specify areas to be excluded from it. Points can be specified as an array or a tuple of their coordinates. In the current implementation, only two-dimensional points are supported. @@ -2127,17 +2127,17 @@ The user can upload their own data in all formats supported by ClickHouse. There are 3 types of [in-memory storage](#storig-dictionaries-in-memory) available: -- `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. +- `POLYGON_SIMPLE`. This is a naive implementation, where a linear pass through all polygons is made for each query, and membership is checked for each one without using additional indexes. -- `POLYGON_INDEX_EACH`. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions). +- `POLYGON_INDEX_EACH`. A separate index is built for each polygon, which allows you to quickly check whether it belongs in most cases (optimized for geographical regions). Also, a grid is superimposed on the area under consideration, which significantly narrows the number of polygons under consideration. The grid is created by recursively dividing the cell into 16 equal parts and is configured with two parameters. The division stops when the recursion depth reaches `MAX_DEPTH` or when the cell crosses no more than `MIN_INTERSECTIONS` polygons. To respond to the query, there is a corresponding cell, and the index for the polygons stored in it is accessed alternately. -- `POLYGON_INDEX_CELL`. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request. +- `POLYGON_INDEX_CELL`. This placement also creates the grid described above. The same options are available. For each sheet cell, an index is built on all pieces of polygons that fall into it, which allows you to quickly respond to a request. -- `POLYGON`. Synonym to `POLYGON_INDEX_CELL`. +- `POLYGON`. Synonym to `POLYGON_INDEX_CELL`. Dictionary queries are carried out using standard [functions](../../sql-reference/functions/ext-dict-functions.md) for working with dictionaries. An important difference is that here the keys will be the points for which you want to find the polygon containing them. @@ -2272,10 +2272,10 @@ ClickHouse contains a built-in feature for working with a geobase. This allows you to: -- Use a region’s ID to get its name in the desired language. -- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. -- Check whether a region is part of another region. -- Get a chain of parent regions. +- Use a region’s ID to get its name in the desired language. +- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. +- Check whether a region is part of another region. +- Get a chain of parent regions. All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”. @@ -2292,15 +2292,15 @@ You can also create these files yourself. The file format is as follows: `regions_hierarchy*.txt`: TabSeparated (no header), columns: -- region ID (`UInt32`) -- parent region ID (`UInt32`) -- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values -- population (`UInt32`) — optional column +- region ID (`UInt32`) +- parent region ID (`UInt32`) +- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values +- population (`UInt32`) — optional column `regions_names_*.txt`: TabSeparated (no header), columns: -- region ID (`UInt32`) -- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. +- region ID (`UInt32`) +- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. @@ -2321,10 +2321,10 @@ ClickHouse contains a built-in feature for working with a geobase. This allows you to: -- Use a region’s ID to get its name in the desired language. -- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. -- Check whether a region is part of another region. -- Get a chain of parent regions. +- Use a region’s ID to get its name in the desired language. +- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. +- Check whether a region is part of another region. +- Get a chain of parent regions. All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”. @@ -2341,15 +2341,15 @@ You can also create these files yourself. The file format is as follows: `regions_hierarchy*.txt`: TabSeparated (no header), columns: -- region ID (`UInt32`) -- parent region ID (`UInt32`) -- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values -- population (`UInt32`) — optional column +- region ID (`UInt32`) +- parent region ID (`UInt32`) +- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values +- population (`UInt32`) — optional column `regions_names_*.txt`: TabSeparated (no header), columns: -- region ID (`UInt32`) -- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. +- region ID (`UInt32`) +- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index c5244cf62e3..64fae0e82f0 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -1,12 +1,14 @@ --- slug: /en/sql-reference/functions/arithmetic-functions -sidebar_position: 34 +sidebar_position: 5 sidebar_label: Arithmetic --- # Arithmetic Functions -For all arithmetic functions, the result type is calculated as the smallest number type that the result fits in, if there is such a type. The minimum is taken simultaneously based on the number of bits, whether it is signed, and whether it floats. If there are not enough bits, the highest bit type is taken. +The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit. + +The result of addition or multiplication of two integers is unsigned unless one of the integers is signed. Example: @@ -20,39 +22,78 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0 └───────────────┴────────────────────────┴─────────────────────────────────┴──────────────────────────────────────────┘ ``` -Arithmetic functions work for any pair of types from UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, or Float64. +Arithmetic functions work for any pair of `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64` values. -Overflow is produced the same way as in C++. +Overflows are produced the same way as in C++. -## plus(a, b), a + b operator +## plus -Calculates the sum of the numbers. -You can also add integer numbers with a date or date and time. In the case of a date, adding an integer means adding the corresponding number of days. For a date with time, it means adding the corresponding number of seconds. +Calculates the sum of two values `a` and `b`. -## minus(a, b), a - b operator +**Syntax** -Calculates the difference. The result is always signed. +```sql +plus(a, b) +``` -You can also calculate integer numbers from a date or date with time. The idea is the same – see above for ‘plus’. +It is possible to add an integer and a date or date with time. The former operation increments the number of days in the date, the latter operation increments the number of seconds in the date with time. -## multiply(a, b), a \* b operator +Alias: `a + b` (operator) -Calculates the product of the numbers. +## minus -## divide(a, b), a / b operator +Calculates the difference of two values `a` and `b`. The result is always signed. -Calculates the quotient of the numbers. The result type is always a floating-point type. -It is not integer division. For integer division, use the ‘intDiv’ function. -When dividing by zero you get ‘inf’, ‘-inf’, or ‘nan’. +Similar to `plus`, it is possible to subtract an integer from a date or date with time. -## intDiv(a, b) +**Syntax** -Calculates the quotient of the numbers. Divides into integers, rounding down (by the absolute value). +```sql +minus(a, b) +``` -Returns an integer of the type of the dividend (the first parameter). +Alias: `a - b` (operator) + +## multiply + +Calculates the product of two values `a` and `b`. + +**Syntax** + +```sql +multiply(a, b) +``` + +Alias: `a \* b` (operator) + +## divide + +Calculates the quotient of two values `a` and `b`. The result is always a floating-point value. If you need integer division, you can use the `intDiv` function. + +Division by 0 returns `inf`, `-inf`, or `nan`. + +**Syntax** + +```sql +divide(a, b) +``` + +Alias: `a / b` (operator) + +## intDiv + +Performs an integer division of two values `a` by `b`, i.e. computes the quotient rounded down to the next smallest integer. + +The result has the same type as the dividend (the first parameter). An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one. +**Syntax** + +```sql +intDiv(a, b) +``` + **Example** Query: @@ -62,6 +103,7 @@ SELECT intDiv(toFloat64(1), 0.001) AS res, toTypeName(res) ``` + ```response ┌──res─┬─toTypeName(intDiv(toFloat64(1), 0.001))─┐ │ 1000 │ Int64 │ @@ -73,30 +115,65 @@ SELECT intDiv(1, 0.001) AS res, toTypeName(res) ``` + ```response Received exception from server (version 23.2.1): Code: 153. DB::Exception: Received from localhost:9000. DB::Exception: Cannot perform integer division, because it will produce infinite or too large number: While processing intDiv(1, 0.001) AS res, toTypeName(res). (ILLEGAL_DIVISION) ``` -## intDivOrZero(a, b) +## intDivOrZero -Differs from ‘intDiv’ in that it returns zero when dividing by zero or when dividing a minimal negative number by minus one. +Same as `intDiv` but returns zero when dividing by zero or when dividing a minimal negative number by minus one. -## modulo(a, b), a % b operator +**Syntax** + +```sql +intDivOrZero(a, b) +``` + +## modulo + +Calculates the remainder of the division of two values `a` by `b`. -Calculates the remainder when dividing `a` by `b`. The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result is a floating-point number. + The remainder is computed like in C++. Truncated division is used for negative numbers. + An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. -## moduloOrZero(a, b) +**Syntax** -Differs from [modulo](#modulo) in that it returns zero when the divisor is zero. +```sql +modulo(a, b) +``` -## positiveModulo(a, b), positive_modulo(a, b), pmod(a, b) -Calculates the remainder when dividing `a` by `b`. Similar to the function `modulo` except that `positive_modulo` always returns a non-negative number. +Alias: `a % b` (operator) -Notice that `positive_modulo` is 4-5 times slower than `modulo`. You should not use `positive_modulo` unless you want to get a positive result and don't care about performance too much. +## moduloOrZero + +Like [modulo](#modulo) but returns zero when the divisor is zero. + +**Syntax** + +```sql +moduloOrZero(a, b) +``` + +## positiveModulo(a, b) + +Like [modulo](#modulo) but always returns a non-negative number. + +This function is 4-5 times slower than `modulo`. + +**Syntax** + +```sql +positiveModulo(a, b) +``` + +Alias: +- `positive_modulo(a, b)` +- `pmod(a, b)` **Example** @@ -108,51 +185,67 @@ SELECT positiveModulo(-1, 10) Result: -```text - +```result ┌─positiveModulo(-1, 10)─┐ │ 9 │ └────────────────────────┘ ``` -## negate(a), -a operator +## negate -Calculates a number with the reverse sign. The result is always signed. - -## abs(a) - -Calculates the absolute value of the number (a). That is, if a \< 0, it returns -a. For unsigned types it does not do anything. For signed integer types, it returns an unsigned number. - -## gcd(a, b) - -Returns the greatest common divisor of the numbers. -An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. - -## lcm(a, b) - -Returns the least common multiple of the numbers. -An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. - -## max2 - -Compares two values and returns the maximum. The returned value is converted to [Float64](../../sql-reference/data-types/float.md). +Negates a value `a`. The result is always signed. **Syntax** ```sql -max2(value1, value2) +negate(a) ``` -**Arguments** +Alias: `-a` -- `value1` — First value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). -- `value2` — Second value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +## abs -**Returned value** +Calculates the absolute value of `a`. Has no effect if `a` is of an unsigned type. If `a` is of a signed type, it returns an unsigned number. -- The maximum of two values. +**Syntax** -Type: [Float](../../sql-reference/data-types/float.md). +```sql +abs(a) +``` + +## gcd + +Returns the greatest common divisor of two values `a` and `b`. + +An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. + +**Syntax** + +```sql +gcd(a, b) +``` + +## lcm(a, b) + +Returns the least common multiple of two values `a` and `b`. + +An exception is thrown when dividing by zero or when dividing a minimal negative number by minus one. + +**Syntax** + +```sql +lcm(a, b) +``` + +## max2 + +Returns the bigger of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). + +**Syntax** + +```sql +max2(a, b) +``` **Example** @@ -164,7 +257,7 @@ SELECT max2(-1, 2); Result: -```text +```result ┌─max2(-1, 2)─┐ │ 2 │ └─────────────┘ @@ -172,25 +265,14 @@ Result: ## min2 -Compares two values and returns the minimum. The returned value is converted to [Float64](../../sql-reference/data-types/float.md). +Returns the smaller of two values `a` and `b`. The returned value is of type [Float64](../../sql-reference/data-types/float.md). **Syntax** ```sql -min2(value1, value2) +min2(a, b) ``` -**Arguments** - -- `value1` — First value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). -- `value2` — Second value. [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). - -**Returned value** - -- The minimum of two values. - -Type: [Float](../../sql-reference/data-types/float.md). - **Example** Query: @@ -201,21 +283,19 @@ SELECT min2(-1, 2); Result: -```text +```result ┌─min2(-1, 2)─┐ │ -1 │ └─────────────┘ ``` -## multiplyDecimal(a, b[, result_scale]) +## multiplyDecimal -Performs multiplication on two decimals. Result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). -Result scale can be explicitly specified by `result_scale` argument (const Integer in range `[0, 76]`). If not specified, the result scale is the max scale of given arguments. +Multiplies two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). -:::note -These functions work significantly slower than usual `multiply`. -In case you don't really need controlled precision and/or need fast computation, consider using [multiply](#multiply) -::: +The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. + +This function work significantly slower than usual `multiply`. In case no control over the result precision is needed and/or fast computation is desired, consider using `multiply`. **Syntax** @@ -225,31 +305,34 @@ multiplyDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). +- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). +- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). **Returned value** -- The result of multiplication with given scale. +- The result of multiplication with given scale. Type: [Decimal256](../../sql-reference/data-types/decimal.md). **Example** -```text +```result ┌─multiplyDecimal(toDecimal256(-12, 0), toDecimal32(-2.1, 1), 1)─┐ │ 25.2 │ └────────────────────────────────────────────────────────────────┘ ``` -**Difference from regular multiplication:** +**Differences compared to regular multiplication:** + ```sql SELECT toDecimal64(-12.647, 3) * toDecimal32(2.1239, 4); SELECT toDecimal64(-12.647, 3) as a, toDecimal32(2.1239, 4) as b, multiplyDecimal(a, b); ``` -```text +Result: + +```result ┌─multiply(toDecimal64(-12.647, 3), toDecimal32(2.1239, 4))─┐ │ -26.8609633 │ └───────────────────────────────────────────────────────────┘ @@ -270,7 +353,9 @@ SELECT a * b; ``` -```text +Result: + +```result ┌─────────────a─┬─────────────b─┬─multiplyDecimal(toDecimal64(-12.647987876, 9), toDecimal64(123.967645643, 9))─┐ │ -12.647987876 │ 123.967645643 │ -1567.941279108 │ └───────────────┴───────────────┴───────────────────────────────────────────────────────────────────────────────┘ @@ -279,15 +364,14 @@ Received exception from server (version 22.11.1): Code: 407. DB::Exception: Received from localhost:9000. DB::Exception: Decimal math overflow: While processing toDecimal64(-12.647987876, 9) AS a, toDecimal64(123.967645643, 9) AS b, a * b. (DECIMAL_OVERFLOW) ``` -## divideDecimal(a, b[, result_scale]) +## divideDecimal -Performs division on two decimals. Result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). -Result scale can be explicitly specified by `result_scale` argument (const Integer in range `[0, 76]`). If not specified, the result scale is the max scale of given arguments. -:::note -These function work significantly slower than usual `divide`. -In case you don't really need controlled precision and/or need fast computation, consider using [divide](#divide). -::: +Divides two decimals `a` and `b`. The result value will be of type [Decimal256](../../sql-reference/data-types/decimal.md). + +The scale of the result can be explicitly specified by `result_scale`. If `result_scale` is not specified, it is assumed to be the maximum scale of the input values. + +This function work significantly slower than usual `divide`. In case no control over the result precision is needed and/or fast computation is desired, consider using `divide`. **Syntax** @@ -297,31 +381,34 @@ divideDecimal(a, b[, result_scale]) **Arguments** -- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). -- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). -- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `a` — First value: [Decimal](../../sql-reference/data-types/decimal.md). +- `b` — Second value: [Decimal](../../sql-reference/data-types/decimal.md). +- `result_scale` — Scale of result: [Int/UInt](../../sql-reference/data-types/int-uint.md). **Returned value** -- The result of division with given scale. +- The result of division with given scale. Type: [Decimal256](../../sql-reference/data-types/decimal.md). **Example** -```text +```result ┌─divideDecimal(toDecimal256(-12, 0), toDecimal32(2.1, 1), 10)─┐ │ -5.7142857142 │ └──────────────────────────────────────────────────────────────┘ ``` -**Difference from regular division:** +**Differences compared to regular division:** + ```sql SELECT toDecimal64(-12, 1) / toDecimal32(2.1, 1); SELECT toDecimal64(-12, 1) as a, toDecimal32(2.1, 1) as b, divideDecimal(a, b, 1), divideDecimal(a, b, 5); ``` -```text +Result: + +```result ┌─divide(toDecimal64(-12, 1), toDecimal32(2.1, 1))─┐ │ -5.7 │ └──────────────────────────────────────────────────┘ @@ -336,7 +423,9 @@ SELECT toDecimal64(-12, 0) / toDecimal32(2.1, 1); SELECT toDecimal64(-12, 0) as a, toDecimal32(2.1, 1) as b, divideDecimal(a, b, 1), divideDecimal(a, b, 5); ``` -```text +Result: + +```result DB::Exception: Decimal result's scale is less than argument's one: While processing toDecimal64(-12, 0) / toDecimal32(2.1, 1). (ARGUMENT_OUT_OF_BOUND) ┌───a─┬───b─┬─divideDecimal(toDecimal64(-12, 0), toDecimal32(2.1, 1), 1)─┬─divideDecimal(toDecimal64(-12, 0), toDecimal32(2.1, 1), 5)─┐ diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 9d2f89c1837..3e70f94a0d2 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/array-functions -sidebar_position: 35 +sidebar_position: 10 sidebar_label: Arrays --- @@ -18,7 +18,7 @@ empty([x]) An array is considered empty if it does not contain any elements. -:::note +:::note Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT empty(arr) FROM TABLE;` transforms to `SELECT arr.size0 = 0 FROM TABLE;`. ::: @@ -26,11 +26,11 @@ The function also works for [strings](string-functions.md#empty) or [UUID](uuid- **Arguments** -- `[x]` — Input array. [Array](../data-types/array.md). +- `[x]` — Input array. [Array](../data-types/array.md). **Returned value** -- Returns `1` for an empty array or `0` for a non-empty array. +- Returns `1` for an empty array or `0` for a non-empty array. Type: [UInt8](../data-types/int-uint.md). @@ -62,7 +62,7 @@ notEmpty([x]) An array is considered non-empty if it contains at least one element. -:::note +:::note Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT notEmpty(arr) FROM table` transforms to `SELECT arr.size0 != 0 FROM TABLE`. ::: @@ -70,11 +70,11 @@ The function also works for [strings](string-functions.md#notempty) or [UUID](uu **Arguments** -- `[x]` — Input array. [Array](../data-types/array.md). +- `[x]` — Input array. [Array](../data-types/array.md). **Returned value** -- Returns `1` for a non-empty array or `0` for an empty array. +- Returns `1` for a non-empty array or `0` for an empty array. Type: [UInt8](../data-types/int-uint.md). @@ -118,38 +118,41 @@ Accepts zero arguments and returns an empty array of the appropriate type. Accepts an empty array and returns a one-element array that is equal to the default value. - ## range(end), range(\[start, \] end \[, step\]) Returns an array of numbers from `start` to `end - 1` by `step`. The supported types are [UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64](../data-types/int-uint.md). **Syntax** + ``` sql range([start, ] end [, step]) ``` **Arguments** -- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. -- `end` — The number before which the array is constructed. Required. -- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. +- `start` — The first element of the array. Optional, required if `step` is used. Default value: 0. +- `end` — The number before which the array is constructed. Required. +- `step` — Determines the incremental step between each element in the array. Optional. Default value: 1. **Returned value** -- Array of numbers from `start` to `end - 1` by `step`. +- Array of numbers from `start` to `end - 1` by `step`. **Implementation details** -- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's. -- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. +- All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments's. +- An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. **Examples** Query: + ``` sql SELECT range(5), range(1, 5), range(1, 5, 2), range(-1, 5, 2); ``` + Result: + ```txt ┌─range(5)────┬─range(1, 5)─┬─range(1, 5, 2)─┬─range(-1, 5, 2)─┐ │ [0,1,2,3,4] │ [1,2,3,4] │ [1,3] │ [-1,1,3] │ @@ -176,7 +179,7 @@ arrayConcat(arrays) **Arguments** -- `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. +- `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. **Example** @@ -226,19 +229,19 @@ hasAll(set, subset) **Arguments** -- `set` – Array of any type with a set of elements. -- `subset` – Array of any type with elements that should be tested to be a subset of `set`. +- `set` – Array of any type with a set of elements. +- `subset` – Array of any type with elements that should be tested to be a subset of `set`. **Return values** -- `1`, if `set` contains all of the elements from `subset`. -- `0`, otherwise. +- `1`, if `set` contains all of the elements from `subset`. +- `0`, otherwise. **Peculiar properties** -- An empty array is a subset of any array. -- `Null` processed as a value. -- Order of values in both of arrays does not matter. +- An empty array is a subset of any array. +- `Null` processed as a value. +- Order of values in both of arrays does not matter. **Examples** @@ -264,18 +267,18 @@ hasAny(array1, array2) **Arguments** -- `array1` – Array of any type with a set of elements. -- `array2` – Array of any type with a set of elements. +- `array1` – Array of any type with a set of elements. +- `array2` – Array of any type with a set of elements. **Return values** -- `1`, if `array1` and `array2` have one similar element at least. -- `0`, otherwise. +- `1`, if `array1` and `array2` have one similar element at least. +- `0`, otherwise. **Peculiar properties** -- `Null` processed as a value. -- Order of values in both of arrays does not matter. +- `Null` processed as a value. +- Order of values in both of arrays does not matter. **Examples** @@ -301,24 +304,25 @@ In other words, the functions will check whether all the elements of `array2` ar the `hasAll` function. In addition, it will check that the elements are observed in the same order in both `array1` and `array2`. For Example: + - `hasSubstr([1,2,3,4], [2,3])` returns 1. However, `hasSubstr([1,2,3,4], [3,2])` will return `0`. - `hasSubstr([1,2,3,4], [1,2,3])` returns 1. However, `hasSubstr([1,2,3,4], [1,2,4])` will return `0`. **Arguments** -- `array1` – Array of any type with a set of elements. -- `array2` – Array of any type with a set of elements. +- `array1` – Array of any type with a set of elements. +- `array2` – Array of any type with a set of elements. **Return values** -- `1`, if `array1` contains `array2`. -- `0`, otherwise. +- `1`, if `array1` contains `array2`. +- `0`, otherwise. **Peculiar properties** -- The function will return `1` if `array2` is empty. -- `Null` processed as a value. In other words `hasSubstr([1, 2, NULL, 3, 4], [2,3])` will return `0`. However, `hasSubstr([1, 2, NULL, 3, 4], [2,NULL,3])` will return `1` -- Order of values in both of arrays does matter. +- The function will return `1` if `array2` is empty. +- `Null` processed as a value. In other words `hasSubstr([1, 2, NULL, 3, 4], [2,3])` will return `0`. However, `hasSubstr([1, 2, NULL, 3, 4], [2,NULL,3])` will return `1` +- Order of values in both of arrays does matter. **Examples** @@ -484,7 +488,7 @@ arrayPopBack(array) **Arguments** -- `array` – Array. +- `array` – Array. **Example** @@ -508,7 +512,7 @@ arrayPopFront(array) **Arguments** -- `array` – Array. +- `array` – Array. **Example** @@ -532,8 +536,8 @@ arrayPushBack(array, single_value) **Arguments** -- `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `array` – Array. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -557,8 +561,8 @@ arrayPushFront(array, single_value) **Arguments** -- `array` – Array. -- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. +- `array` – Array. +- `single_value` – A single value. Only numbers can be added to an array with numbers, and only strings can be added to an array of strings. When adding numbers, ClickHouse automatically sets the `single_value` type for the data type of the array. For more information about the types of data in ClickHouse, see “[Data types](../../sql-reference/data-types/index.md#data_types)”. Can be `NULL`. The function adds a `NULL` element to an array, and the type of array elements converts to `Nullable`. **Example** @@ -582,11 +586,11 @@ arrayResize(array, size[, extender]) **Arguments:** -- `array` — Array. -- `size` — Required length of the array. - - If `size` is less than the original size of the array, the array is truncated from the right. -- If `size` is larger than the initial size of the array, the array is extended to the right with `extender` values or default values for the data type of the array items. -- `extender` — Value for extending an array. Can be `NULL`. +- `array` — Array. +- `size` — Required length of the array. + - If `size` is less than the original size of the array, the array is truncated from the right. +- If `size` is larger than the initial size of the array, the array is extended to the right with `extender` values or default values for the data type of the array items. +- `extender` — Value for extending an array. Can be `NULL`. **Returned value:** @@ -642,7 +646,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res; Array elements set to `NULL` are handled as normal values. -## arraySort(\[func,\] arr, …) +## arraySort(\[func,\] arr, …) {#array_functions-sort} Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description. @@ -682,10 +686,10 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]); └───────────────────────────────────────────────────────────┘ ``` -- `-Inf` values are first in the array. -- `NULL` values are last in the array. -- `NaN` values are right before `NULL`. -- `Inf` values are right before `NaN`. +- `-Inf` values are first in the array. +- `NULL` values are last in the array. +- `NaN` values are right before `NULL`. +- `Inf` values are right before `NaN`. Note that `arraySort` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. In this case, sorting order is determined by the result of the lambda function applied to the elements of the array. @@ -739,11 +743,15 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; └─────────┘ ``` -:::note +:::note To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used. ::: -## arrayReverseSort(\[func,\] arr, …) +## arrayPartialSort(\[func,\] limit, arr, …) + +Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. + +## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. @@ -783,10 +791,10 @@ SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res; └───────────────────────────────────────┘ ``` -- `Inf` values are first in the array. -- `NULL` values are last in the array. -- `NaN` values are right before `NULL`. -- `-Inf` values are right before `NaN`. +- `Inf` values are first in the array. +- `NULL` values are last in the array. +- `NaN` values are right before `NULL`. +- `-Inf` values are right before `NaN`. Note that the `arrayReverseSort` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. Example is shown below. @@ -802,8 +810,8 @@ SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res; The array is sorted in the following way: -1. At first, the source array (\[1, 2, 3\]) is sorted according to the result of the lambda function applied to the elements of the array. The result is an array \[3, 2, 1\]. -2. Array that is obtained on the previous step, is reversed. So, the final result is \[1, 2, 3\]. +1. At first, the source array (\[1, 2, 3\]) is sorted according to the result of the lambda function applied to the elements of the array. The result is an array \[3, 2, 1\]. +2. Array that is obtained on the previous step, is reversed. So, the final result is \[1, 2, 3\]. The lambda function can accept multiple arguments. In this case, you need to pass the `arrayReverseSort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example: @@ -819,8 +827,8 @@ SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; In this example, the array is sorted in the following way: -1. At first, the source array (\[‘hello’, ‘world’\]) is sorted according to the result of the lambda function applied to the elements of the arrays. The elements that are passed in the second array (\[2, 1\]), define the sorting keys for corresponding elements from the source array. The result is an array \[‘world’, ‘hello’\]. -2. Array that was sorted on the previous step, is reversed. So, the final result is \[‘hello’, ‘world’\]. +1. At first, the source array (\[‘hello’, ‘world’\]) is sorted according to the result of the lambda function applied to the elements of the arrays. The elements that are passed in the second array (\[2, 1\]), define the sorting keys for corresponding elements from the source array. The result is an array \[‘world’, ‘hello’\]. +2. Array that was sorted on the previous step, is reversed. So, the final result is \[‘hello’, ‘world’\]. Other examples are shown below. @@ -844,6 +852,10 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` +## arrayPartialReverseSort(\[func,\] limit, arr, …) + +Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. + ## arrayUniq(arr, …) If one argument is passed, it counts the number of different elements in the array. @@ -857,7 +869,7 @@ A special function. See the section [“ArrayJoin function”](../../sql-referen ## arrayDifference -Calculates the difference between adjacent array elements. Returns an array where the first element will be 0, the second is the difference between `a[1] - a[0]`, etc. The type of elements in the resulting array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). +Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). **Syntax** @@ -867,11 +879,11 @@ arrayDifference(array) **Arguments** -- `array` – [Array](https://clickhouse.com/docs/en/data_types/array/). +- `array` – [Array](https://clickhouse.com/docs/en/data_types/array/). **Returned values** -Returns an array of differences between adjacent elements. +Returns an array of differences between adjacent array elements. Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). @@ -919,7 +931,7 @@ arrayDistinct(array) **Arguments** -- `array` – [Array](https://clickhouse.com/docs/en/data_types/array/). +- `array` – [Array](https://clickhouse.com/docs/en/data_types/array/). **Returned values** @@ -987,8 +999,8 @@ arrayReduce(agg_func, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). +- `arr` — Any number of [array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** @@ -1052,13 +1064,13 @@ arrayReduceInRanges(agg_func, ranges, arr1, arr2, ..., arrN) **Arguments** -- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). -- `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. -- `arr` — Any number of [Array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. +- `agg_func` — The name of an aggregate function which should be a constant [string](../../sql-reference/data-types/string.md). +- `ranges` — The ranges to aggretate which should be an [array](../../sql-reference/data-types/array.md) of [tuples](../../sql-reference/data-types/tuple.md) which containing the index and the length of each range. +- `arr` — Any number of [Array](../../sql-reference/data-types/array.md) type columns as the parameters of the aggregation function. **Returned value** -- Array containing results of the aggregate function over specified ranges. +- Array containing results of the aggregate function over specified ranges. Type: [Array](../../sql-reference/data-types/array.md). @@ -1108,8 +1120,8 @@ Converts an array of arrays to a flat array. Function: -- Applies to any depth of nested arrays. -- Does not change arrays that are already flat. +- Applies to any depth of nested arrays. +- Does not change arrays that are already flat. The flattened array contains all the elements from all source arrays. @@ -1123,7 +1135,7 @@ Alias: `flatten`. **Arguments** -- `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. +- `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. **Examples** @@ -1185,13 +1197,13 @@ arrayZip(arr1, arr2, ..., arrN) **Arguments** -- `arrN` — [Array](../../sql-reference/data-types/array.md). +- `arrN` — [Array](../../sql-reference/data-types/array.md). The function can take any number of arrays of different types. All the input arrays must be of equal size. **Returned value** -- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. +- Array with elements from the source arrays grouped into [tuples](../../sql-reference/data-types/tuple.md). Data types in the tuple are the same as types of the input arrays and in the same order as arrays are passed. Type: [Array](../../sql-reference/data-types/array.md). @@ -1213,7 +1225,7 @@ Result: ## arrayAUC -Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: https://en.wikipedia.org/wiki/Receiver_operating_characteristic#Area_under_the_curve). +Calculate AUC (Area Under the Curve, which is a concept in machine learning, see more details: ). **Syntax** @@ -1433,12 +1445,12 @@ arrayMin([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). **Returned value** -- The minimum of function values (or the array minimum). +- The minimum of function values (or the array minimum). Type: if `func` is specified, matches `func` return value type, else matches the array elements type. @@ -1488,12 +1500,12 @@ arrayMax([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). **Returned value** -- The maximum of function values (or the array maximum). +- The maximum of function values (or the array maximum). Type: if `func` is specified, matches `func` return value type, else matches the array elements type. @@ -1543,12 +1555,12 @@ arraySum([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). **Returned value** -- The sum of the function values (or the array sum). +- The sum of the function values (or the array sum). Type: for decimal numbers in source array (or for converted values, if `func` is specified) — [Decimal128](../../sql-reference/data-types/decimal.md), for floating point numbers — [Float64](../../sql-reference/data-types/float.md), for numeric unsigned — [UInt64](../../sql-reference/data-types/int-uint.md), and for numeric signed — [Int64](../../sql-reference/data-types/int-uint.md). @@ -1598,12 +1610,12 @@ arrayAvg([func,] arr) **Arguments** -- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). -- `arr` — Array. [Array](../../sql-reference/data-types/array.md). +- `func` — Function. [Expression](../../sql-reference/data-types/special-data-types/expression.md). +- `arr` — Array. [Array](../../sql-reference/data-types/array.md). **Returned value** -- The average of function values (or the array average). +- The average of function values (or the array average). Type: [Float64](../../sql-reference/data-types/float.md). @@ -1639,7 +1651,23 @@ Result: ## arrayCumSum(\[func,\] arr1, …) -Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by `func(arr1[i], …, arrN[i])` before summing. +Returns an array of the partial (running) sums of the elements in the source array `arr1`. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. + +**Syntax** + +``` sql +arrayCumSum(arr) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. + +**Returned value** + +- Returns an array of the partial sums of the elements in the source array. + +Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). Example: @@ -1655,9 +1683,25 @@ SELECT arrayCumSum([1, 1, 1, 1]) AS res Note that the `arrayCumSum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. -## arrayCumSumNonNegative(arr) +## arrayCumSumNonNegative(\[func,\] arr1, …) -Same as `arrayCumSum`, returns an array of partial sums of elements in the source array (a running sum). Different `arrayCumSum`, when then returned value contains a value less than zero, the value is replace with zero and the subsequent calculation is performed with zero parameters. For example: +Same as `arrayCumSum`, returns an array of the partial (running) sums of the elements in the source array. If `func` is specified, then the sum is computed from applying `func` to `arr1`, `arr2`, ..., `arrN`, i.e. `func(arr1[i], …, arrN[i])`. Unlike `arrayCumSum`, if the current running sum is smaller than `0`, it is replaced by `0`. + +**Syntax** + +``` sql +arrayCumSumNonNegative(arr) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. + +**Returned value** + +- Returns an array of non-negative partial sums of elements in the source array. + +Type: [UInt\*](https://clickhouse.com/docs/en/data_types/int_uint/#uint-ranges), [Int\*](https://clickhouse.com/docs/en/data_types/int_uint/#int-ranges), [Float\*](https://clickhouse.com/docs/en/data_types/float/). ``` sql SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res @@ -1668,6 +1712,7 @@ SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res │ [1,2,0,1] │ └───────────┘ ``` + Note that the `arraySumNonNegative` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. ## arrayProduct @@ -1682,11 +1727,11 @@ arrayProduct(arr) **Arguments** -- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. +- `arr` — [Array](../../sql-reference/data-types/array.md) of numeric values. **Returned value** -- A product of array's elements. +- A product of array's elements. Type: [Float64](../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/functions/array-join.md b/docs/en/sql-reference/functions/array-join.md index d6256ba2dc5..14968eb1092 100644 --- a/docs/en/sql-reference/functions/array-join.md +++ b/docs/en/sql-reference/functions/array-join.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/array-join -sidebar_position: 61 +sidebar_position: 15 sidebar_label: arrayJoin --- diff --git a/docs/en/sql-reference/functions/bit-functions.md b/docs/en/sql-reference/functions/bit-functions.md index 1648ce35056..e754aa297c0 100644 --- a/docs/en/sql-reference/functions/bit-functions.md +++ b/docs/en/sql-reference/functions/bit-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/bit-functions -sidebar_position: 48 +sidebar_position: 20 sidebar_label: Bit --- @@ -34,12 +34,12 @@ bitShiftLeft(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** -- Shifted value. +- Shifted value. The type of the returned value is the same as the type of the input value. @@ -81,12 +81,12 @@ bitShiftRight(a, b) **Arguments** -- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. +- `a` — A value to shift. [Integer types](../../sql-reference/data-types/int-uint.md), [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `b` — The number of shift positions. [Unsigned integer types](../../sql-reference/data-types/int-uint.md), 64 bit types or less are allowed. **Returned value** -- Shifted value. +- Shifted value. The type of the returned value is the same as the type of the input value. @@ -179,8 +179,8 @@ SELECT bitTest(number, index) **Arguments** -- `number` – Integer number. -- `index` – Position of bit. +- `number` – Integer number. +- `index` – Position of bit. **Returned values** @@ -244,8 +244,8 @@ SELECT bitTestAll(number, index1, index2, index3, index4, ...) **Arguments** -- `number` – Integer number. -- `index1`, `index2`, `index3`, `index4` – Positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). +- `number` – Integer number. +- `index1`, `index2`, `index3`, `index4` – Positions of bit. For example, for set of positions (`index1`, `index2`, `index3`, `index4`) is true if and only if all of its positions are true (`index1` ⋀ `index2`, ⋀ `index3` ⋀ `index4`). **Returned values** @@ -309,8 +309,8 @@ SELECT bitTestAny(number, index1, index2, index3, index4, ...) **Arguments** -- `number` – Integer number. -- `index1`, `index2`, `index3`, `index4` – Positions of bit. +- `number` – Integer number. +- `index1`, `index2`, `index3`, `index4` – Positions of bit. **Returned values** @@ -364,11 +364,11 @@ bitCount(x) **Arguments** -- `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. +- `x` — [Integer](../../sql-reference/data-types/int-uint.md) or [floating-point](../../sql-reference/data-types/float.md) number. The function uses the value representation in memory. It allows supporting floating-point numbers. **Returned value** -- Number of bits set to one in the input number. +- Number of bits set to one in the input number. The function does not convert input value to a larger type ([sign extension](https://en.wikipedia.org/wiki/Sign_extension)). So, for example, `bitCount(toUInt8(-1)) = 8`. @@ -404,12 +404,12 @@ bitHammingDistance(int1, int2) **Arguments** -- `int1` — First integer value. [Int64](../../sql-reference/data-types/int-uint.md). -- `int2` — Second integer value. [Int64](../../sql-reference/data-types/int-uint.md). +- `int1` — First integer value. [Int64](../../sql-reference/data-types/int-uint.md). +- `int2` — Second integer value. [Int64](../../sql-reference/data-types/int-uint.md). **Returned value** -- The Hamming distance. +- The Hamming distance. Type: [UInt8](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index 9751de8abed..1a175d5ffbc 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -1,22 +1,18 @@ --- slug: /en/sql-reference/functions/bitmap-functions -sidebar_position: 49 +sidebar_position: 25 sidebar_label: Bitmap --- # Bitmap Functions -Bitmap functions work for two bitmaps Object value calculation, it is to return new bitmap or cardinality while using formula calculation, such as and, or, xor, and not, etc. - -There are 2 kinds of construction methods for Bitmap Object. One is to be constructed by aggregation function groupBitmap with -State, the other is to be constructed by Array Object. It is also to convert Bitmap Object to Array Object. - -RoaringBitmap is wrapped into a data structure while actual storage of Bitmap objects. When the cardinality is less than or equal to 32, it uses Set objet. When the cardinality is greater than 32, it uses RoaringBitmap object. That is why storage of low cardinality set is faster. - -For more information on RoaringBitmap, see: [CRoaring](https://github.com/RoaringBitmap/CRoaring). +Bitmaps can be constructed in two ways. The first way is constructed by aggregation function groupBitmap with `-State`, the other way is to constructed a bitmap from an Array object. ## bitmapBuild -Build a bitmap from unsigned integer array. +Builds a bitmap from an unsigned integer array. + +**Syntax** ``` sql bitmapBuild(array) @@ -24,7 +20,7 @@ bitmapBuild(array) **Arguments** -- `array` – Unsigned integer array. +- `array` – Unsigned integer array. **Example** @@ -40,7 +36,9 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res); ## bitmapToArray -Convert bitmap to integer array. +Converts bitmap to an integer array. + +**Syntax** ``` sql bitmapToArray(bitmap) @@ -48,7 +46,7 @@ bitmapToArray(bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -56,6 +54,8 @@ bitmapToArray(bitmap) SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res; ``` +Result: + ``` text ┌─res─────────┐ │ [1,2,3,4,5] │ @@ -64,7 +64,9 @@ SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5])) AS res; ## bitmapSubsetInRange -Return subset in specified range (not include the range_end). +Returns the subset of a bitmap with bits within a value interval. + +**Syntax** ``` sql bitmapSubsetInRange(bitmap, range_start, range_end) @@ -72,9 +74,9 @@ bitmapSubsetInRange(bitmap, range_start, range_end) **Arguments** -- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – Range start point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `range_end` – Range end point (excluded). Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `range_end` – End of the range (exclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). **Example** @@ -82,6 +84,8 @@ bitmapSubsetInRange(bitmap, range_start, range_end) SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res; ``` +Result: + ``` text ┌─res───────────────┐ │ [30,31,32,33,100] │ @@ -90,7 +94,7 @@ SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11, ## bitmapSubsetLimit -Creates a subset of bitmap with n elements taken between `range_start` and `cardinality_limit`. +Returns a subset of a bitmap with smallest bit value `range_start` and at most `cardinality_limit` elements. **Syntax** @@ -100,20 +104,12 @@ bitmapSubsetLimit(bitmap, range_start, cardinality_limit) **Arguments** -- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). -- `range_start` – The subset starting point. Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – The subset cardinality upper limit. Type: [UInt32](../../sql-reference/data-types/int-uint.md). - -**Returned value** - -The subset. - -Type: [Bitmap object](#bitmap_functions-bitmapbuild). +- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `range_start` – Start of the range (inclusive). Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `cardinality_limit` – Maximum cardinality of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(30), toUInt32(200))) AS res; ``` @@ -128,7 +124,7 @@ Result: ## subBitmap -Returns the bitmap elements, starting from the `offset` position. The number of returned elements is limited by the `cardinality_limit` parameter. Analog of the [substring](string-functions.md#substring)) string function, but for bitmap. +Returns a subset of the bitmap, starting from position `offset`. The maximum cardinality of the returned bitmap is `cardinality_limit`. **Syntax** @@ -138,20 +134,12 @@ subBitmap(bitmap, offset, cardinality_limit) **Arguments** -- `bitmap` – The bitmap. Type: [Bitmap object](#bitmap_functions-bitmapbuild). -- `offset` – The position of the first element of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). -- `cardinality_limit` – The maximum number of elements in the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). - -**Returned value** - -The subset. - -Type: [Bitmap object](#bitmap_functions-bitmapbuild). +- `bitmap` – The bitmap. Type: [Bitmap object](#bitmap_functions-bitmapbuild). +- `offset` – The position of the first element of the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `cardinality_limit` – The maximum number of elements in the subset. Type: [UInt32](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT bitmapToArray(subBitmap(bitmapBuild([0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]), toUInt32(10), toUInt32(10))) AS res; ``` @@ -169,18 +157,18 @@ Result: Checks whether the bitmap contains an element. ``` sql -bitmapContains(haystack, needle) +bitmapContains(bitmap, needle) ``` **Arguments** -- `haystack` – [Bitmap object](#bitmap_functions-bitmapbuild), where the function searches. -- `needle` – Value that the function searches. Type: [UInt32](../../sql-reference/data-types/int-uint.md). +- `bitmap` – [Bitmap object](#bitmap_functions-bitmapbuild). +- `needle` – Searched bit value. Type: [UInt32](../../sql-reference/data-types/int-uint.md). **Returned values** -- 0 — If `haystack` does not contain `needle`. -- 1 — If `haystack` contains `needle`. +- 0 — If `bitmap` does not contain `needle`. +- 1 — If `bitmap` contains `needle`. Type: `UInt8`. @@ -190,6 +178,8 @@ Type: `UInt8`. SELECT bitmapContains(bitmapBuild([1,5,7,9]), toUInt32(9)) AS res; ``` +Result: + ``` text ┌─res─┐ │ 1 │ @@ -198,22 +188,25 @@ SELECT bitmapContains(bitmapBuild([1,5,7,9]), toUInt32(9)) AS res; ## bitmapHasAny -Checks whether two bitmaps have intersection by some elements. +Checks whether two bitmaps intersect. + +If `bitmap2` contains exactly one element, consider using [bitmapContains](#bitmap_functions-bitmapcontains) instead as it works more efficiently. + +**Syntax** ``` sql bitmapHasAny(bitmap1, bitmap2) ``` -If you are sure that `bitmap2` contains strictly one element, consider using the [bitmapContains](#bitmap_functions-bitmapcontains) function. It works more efficiently. - **Arguments** -- `bitmap*` – Bitmap object. +- `bitmap1` – Bitmap object 1. +- `bitmap2` – Bitmap object 2. **Return values** -- `1`, if `bitmap1` and `bitmap2` have one similar element at least. -- `0`, otherwise. +- `1`, if `bitmap1` and `bitmap2` have at least one shared element. +- `0`, otherwise. **Example** @@ -221,6 +214,8 @@ If you are sure that `bitmap2` contains strictly one element, consider using the SELECT bitmapHasAny(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 1 │ @@ -229,16 +224,21 @@ SELECT bitmapHasAny(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapHasAll -Analogous to `hasAll(array, array)` returns 1 if the first bitmap contains all the elements of the second one, 0 otherwise. -If the second argument is an empty bitmap then returns 1. +Returns 1 if the first bitmap contains all elements of the second bitmap, otherwise 0. +If the second bitmap is empty, returns 1. + +Also see `hasAll(array, array)`. + +**Syntax** ``` sql -bitmapHasAll(bitmap,bitmap) +bitmapHasAll(bitmap1, bitmap2) ``` **Arguments** -- `bitmap` – Bitmap object. +- `bitmap1` – Bitmap object 1. +- `bitmap2` – Bitmap object 2. **Example** @@ -246,6 +246,8 @@ bitmapHasAll(bitmap,bitmap) SELECT bitmapHasAll(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 0 │ @@ -254,7 +256,9 @@ SELECT bitmapHasAll(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapCardinality -Retrun bitmap cardinality of type UInt64. +Rerturn the cardinality of a bitmap. + +**Syntax** ``` sql bitmapCardinality(bitmap) @@ -262,7 +266,7 @@ bitmapCardinality(bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -270,6 +274,8 @@ bitmapCardinality(bitmap) SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 5 │ @@ -278,13 +284,17 @@ SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5])) AS res; ## bitmapMin -Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is empty. +Computes the smallest bit set in a bitmap, or UINT32_MAX if the bitmap is empty. - bitmapMin(bitmap) +**Syntax** + +```sql +bitmapMin(bitmap) +``` **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -292,6 +302,8 @@ Retrun the smallest value of type UInt64 in the set, UINT32_MAX if the set is em SELECT bitmapMin(bitmapBuild([1, 2, 3, 4, 5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 1 │ @@ -300,13 +312,17 @@ SELECT bitmapMin(bitmapBuild([1, 2, 3, 4, 5])) AS res; ## bitmapMax -Retrun the greatest value of type UInt64 in the set, 0 if the set is empty. +Computes the greatest bit set in a bitmap, or 0 if the bitmap is empty. - bitmapMax(bitmap) +**Syntax** + +```sql +bitmapMax(bitmap) +``` **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -314,6 +330,8 @@ Retrun the greatest value of type UInt64 in the set, 0 if the set is empty. SELECT bitmapMax(bitmapBuild([1, 2, 3, 4, 5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 5 │ @@ -322,15 +340,21 @@ SELECT bitmapMax(bitmapBuild([1, 2, 3, 4, 5])) AS res; ## bitmapTransform -Transform an array of values in a bitmap to another array of values, the result is a new bitmap. +Replaces at most N bits in a bitmap. The old and new value of the i-th replaced bit is given by `from_array[i]` and `to_array[i]`. - bitmapTransform(bitmap, from_array, to_array) +The result depends on the array ordering if `from_array` and `to_array`. + +**Syntax** + +``` sql +bitmapTransform(bitmap, from_array, to_array) +``` **Arguments** -- `bitmap` – Bitmap object. -- `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. Note that the result depends on array ordering if there are common elements between from_array and to_array. -- `to_array` – UInt32 array, its size shall be the same to from_array. +- `bitmap` – Bitmap object. +- `from_array` – UInt32 array. For idx in range \[0, from_array.size()), if bitmap contains from_array\[idx\], then replace it with to_array\[idx\]. +- `to_array` – UInt32 array with the same size as `from_array`. **Example** @@ -338,6 +362,8 @@ Transform an array of values in a bitmap to another array of values, the result SELECT bitmapToArray(bitmapTransform(bitmapBuild([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), cast([5,999,2] as Array(UInt32)), cast([2,888,20] as Array(UInt32)))) AS res; ``` +Result: + ``` text ┌─res───────────────────┐ │ [1,3,4,6,7,8,9,10,20] │ @@ -346,7 +372,9 @@ SELECT bitmapToArray(bitmapTransform(bitmapBuild([1, 2, 3, 4, 5, 6, 7, 8, 9, 10] ## bitmapAnd -Two bitmap and calculation, the result is a new bitmap. +Computes the logical conjunction of two two bitmaps. + +**Syntax** ``` sql bitmapAnd(bitmap,bitmap) @@ -354,7 +382,7 @@ bitmapAnd(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -362,6 +390,8 @@ bitmapAnd(bitmap,bitmap) SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res; ``` +Result: + ``` text ┌─res─┐ │ [3] │ @@ -370,7 +400,9 @@ SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS re ## bitmapOr -Two bitmap or calculation, the result is a new bitmap. +Computes the logical disjunction of two bitmaps. + +**Syntax** ``` sql bitmapOr(bitmap,bitmap) @@ -378,7 +410,7 @@ bitmapOr(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -386,6 +418,8 @@ bitmapOr(bitmap,bitmap) SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res; ``` +Result: + ``` text ┌─res─────────┐ │ [1,2,3,4,5] │ @@ -394,7 +428,9 @@ SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res ## bitmapXor -Two bitmap xor calculation, the result is a new bitmap. +Xor-s two bitmaps. + +**Syntax** ``` sql bitmapXor(bitmap,bitmap) @@ -402,7 +438,7 @@ bitmapXor(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -410,6 +446,8 @@ bitmapXor(bitmap,bitmap) SELECT bitmapToArray(bitmapXor(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res; ``` +Result: + ``` text ┌─res───────┐ │ [1,2,4,5] │ @@ -418,7 +456,9 @@ SELECT bitmapToArray(bitmapXor(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS re ## bitmapAndnot -Two bitmap andnot calculation, the result is a new bitmap. +Computes the logical conjunction of two bitmaps and negates the result. + +**Syntax** ``` sql bitmapAndnot(bitmap,bitmap) @@ -426,7 +466,7 @@ bitmapAndnot(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -434,6 +474,8 @@ bitmapAndnot(bitmap,bitmap) SELECT bitmapToArray(bitmapAndnot(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS res; ``` +Result: + ``` text ┌─res───┐ │ [1,2] │ @@ -442,7 +484,9 @@ SELECT bitmapToArray(bitmapAndnot(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]))) AS ## bitmapAndCardinality -Two bitmap and calculation, return cardinality of type UInt64. +Returns the cardinality of the logical conjunction of two bitmaps. + +**Syntax** ``` sql bitmapAndCardinality(bitmap,bitmap) @@ -450,7 +494,7 @@ bitmapAndCardinality(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -458,6 +502,8 @@ bitmapAndCardinality(bitmap,bitmap) SELECT bitmapAndCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 1 │ @@ -466,7 +512,7 @@ SELECT bitmapAndCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapOrCardinality -Two bitmap or calculation, return cardinality of type UInt64. +Returns the cardinality of the logical disjunction of two bitmaps. ``` sql bitmapOrCardinality(bitmap,bitmap) @@ -474,7 +520,7 @@ bitmapOrCardinality(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -482,6 +528,8 @@ bitmapOrCardinality(bitmap,bitmap) SELECT bitmapOrCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 5 │ @@ -490,7 +538,7 @@ SELECT bitmapOrCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapXorCardinality -Two bitmap xor calculation, return cardinality of type UInt64. +Returns the cardinality of the XOR of two bitmaps. ``` sql bitmapXorCardinality(bitmap,bitmap) @@ -498,7 +546,7 @@ bitmapXorCardinality(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -506,6 +554,8 @@ bitmapXorCardinality(bitmap,bitmap) SELECT bitmapXorCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 4 │ @@ -514,7 +564,7 @@ SELECT bitmapXorCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ## bitmapAndnotCardinality -Two bitmap andnot calculation, return cardinality of type UInt64. +Returns the cardinality of the AND-NOT operation of two bitmaps. ``` sql bitmapAndnotCardinality(bitmap,bitmap) @@ -522,7 +572,7 @@ bitmapAndnotCardinality(bitmap,bitmap) **Arguments** -- `bitmap` – Bitmap object. +- `bitmap` – Bitmap object. **Example** @@ -530,6 +580,8 @@ bitmapAndnotCardinality(bitmap,bitmap) SELECT bitmapAndnotCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])) AS res; ``` +Result: + ``` text ┌─res─┐ │ 2 │ diff --git a/docs/en/sql-reference/functions/comparison-functions.md b/docs/en/sql-reference/functions/comparison-functions.md index 586c0dc54e6..b2c8f41e737 100644 --- a/docs/en/sql-reference/functions/comparison-functions.md +++ b/docs/en/sql-reference/functions/comparison-functions.md @@ -1,34 +1,89 @@ --- slug: /en/sql-reference/functions/comparison-functions -sidebar_position: 36 +sidebar_position: 35 sidebar_label: Comparison --- # Comparison Functions -Comparison functions always return 0 or 1 (Uint8). +Below comparison functions return 0 or 1 as Uint8. The following types can be compared: +- numbers +- strings and fixed strings +- dates +- dates with times -- numbers -- strings and fixed strings -- dates -- dates with times +Only values within the same group can be compared (e.g. UInt16 and UInt64) but not accross groups (e.g. UInt16 and DateTime). -within each group, but not between different groups. +Strings are compared byte-by-byte. Note that this may lead to unexpected results if one of the strings contains UTF-8 encoded multi-byte characters. -For example, you can’t compare a date with a string. You have to use a function to convert the string to a date, or vice versa. +A string S1 which has another string S2 as prefix is considered longer than S2. -Strings are compared by bytes. A shorter string is smaller than all strings that start with it and that contain at least one more character. +## equals -### equals, a `=` b and a `==` b operator +**Syntax** -### notEquals, a `!=` b and a `<>` b operator +```sql +equals(a, b) +``` -### less, `<` operator +Alias: +- `a = b` (operator) +- `a == b` (operator) -### greater, `>` operator +## notEquals -### lessOrEquals, `<=` operator +**Syntax** -### greaterOrEquals, `>=` operator +```sql +notEquals(a, b) +``` + +Alias: +- `a != b` (operator) +- `a <> b` (operator) + +## less + +**Syntax** + +```sql +less(a, b) +``` + +Alias: +- `a < b` (operator) + +## greater + +**Syntax** + +```sql +greater(a, b) +``` + +Alias: +- `a > b` (operator) + +## lessOrEquals + +**Syntax** + +```sql +lessOrEquals(a, b) +``` + +Alias: +- `a <= b` (operator) + +## greaterOrEquals, `>=` operator + +**Syntax** + +```sql +greaterOrEquals(a, b) +``` + +Alias: +- `a >= b` (operator) diff --git a/docs/en/sql-reference/functions/conditional-functions.md b/docs/en/sql-reference/functions/conditional-functions.md index ff1ac237025..eb86a6e551a 100644 --- a/docs/en/sql-reference/functions/conditional-functions.md +++ b/docs/en/sql-reference/functions/conditional-functions.md @@ -1,39 +1,40 @@ --- slug: /en/sql-reference/functions/conditional-functions -sidebar_position: 43 -sidebar_label: 'Conditional ' +sidebar_position: 40 +sidebar_label: Conditional --- # Conditional Functions ## if -Controls conditional branching. Unlike most systems, ClickHouse always evaluate both expressions `then` and `else`. +Performs conditional branching. + +If the condition `cond` evaluates to a non-zero value, the function returns the result of the expression `then`. If `cond` evaluates to zero or `NULL`, then the result of the `else` expression is returned. + +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, the `then` expression is evaluated only on rows where `cond` is `true` and the `else` expression where `cond` is `false`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT if(number = 0, 0, intDiv(42, number)) FROM numbers(10)`. + +`then` and `else` must be of a similar type. **Syntax** ``` sql if(cond, then, else) ``` - -If the condition `cond` evaluates to a non-zero value, returns the result of the expression `then`, and the result of the expression `else`, if present, is skipped. If the `cond` is zero or `NULL`, then the result of the `then` expression is skipped and the result of the `else` expression, if present, is returned. - -You can use the [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) setting to calculate the `if` function according to a short scheme. If this setting is enabled, `then` expression is evaluated only on rows where `cond` is true, `else` expression – where `cond` is false. For example, an exception about division by zero is not thrown when executing the query `SELECT if(number = 0, 0, intDiv(42, number)) FROM numbers(10)`, because `intDiv(42, number)` will be evaluated only for numbers that doesn't satisfy condition `number = 0`. +Alias: `cond ? then : else` (ternary operator) **Arguments** -- `cond` – The condition for evaluation that can be zero or not. The type is UInt8, Nullable(UInt8) or NULL. -- `then` – The expression to return if condition is met. -- `else` – The expression to return if condition is not met. +- `cond` – The evaluated condition. UInt8, Nullable(UInt8) or NULL. +- `then` – The expression returned if `condition` is true. +- `else` – The expression returned if `condition` is `false` or NULL. **Returned values** -The function executes `then` and `else` expressions and returns its result, depending on whether the condition `cond` ended up being zero or not. +The result of either the `then` and `else` expressions, depending on condition `cond`. **Example** -Query: - ``` sql SELECT if(1, plus(2, 2), plus(2, 6)); ``` @@ -46,30 +47,34 @@ Result: └────────────┘ ``` -Query: +## multiIf + +Allows to write the [CASE](../../sql-reference/operators/index.md#operator_case) operator more compactly in the query. + +**Syntax** ``` sql -SELECT if(0, plus(2, 2), plus(2, 6)); +multiIf(cond_1, then_1, cond_2, then_2, ..., else) ``` -Result: +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, the `then_i` expression is evaluated only on rows where `((NOT cond_1) AND (NOT cond_2) AND ... AND (NOT cond_{i-1}) AND cond_i)` is `true`, `cond_i` will be evaluated only on rows where `((NOT cond_1) AND (NOT cond_2) AND ... AND (NOT cond_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT multiIf(number = 2, intDiv(1, number), number = 5) FROM numbers(10)`. + +**Arguments** + +The function accepts `2N+1` parameters: +- `cond_N` — The N-th evaluated condition which controls if `then_N` is returned. +- `then_N` — The result of the function when `cond_N` is true. +- `else` — The result of the function if none of conditions is true. + +**Returned values** + +The result of either any of the `then_N` or `else` expressions, depending on the conditions `cond_N`. + +**Example** + +Assuming this table: ``` text -┌─plus(2, 6)─┐ -│ 8 │ -└────────────┘ -``` - -- `then` and `else` must have the lowest common type. - -**Example:** - -Take this `LEFT_RIGHT` table: - -``` sql -SELECT * -FROM LEFT_RIGHT - ┌─left─┬─right─┐ │ ᴺᵁᴸᴸ │ 4 │ │ 1 │ 3 │ @@ -79,69 +84,6 @@ FROM LEFT_RIGHT └──────┴───────┘ ``` -The following query compares `left` and `right` values: - -``` sql -SELECT - left, - right, - if(left < right, 'left is smaller than right', 'right is greater or equal than left') AS is_smaller -FROM LEFT_RIGHT -WHERE isNotNull(left) AND isNotNull(right) - -┌─left─┬─right─┬─is_smaller──────────────────────────┐ -│ 1 │ 3 │ left is smaller than right │ -│ 2 │ 2 │ right is greater or equal than left │ -│ 3 │ 1 │ right is greater or equal than left │ -└──────┴───────┴─────────────────────────────────────┘ -``` - -Note: `NULL` values are not used in this example, check [NULL values in conditionals](#null-values-in-conditionals) section. - -## Ternary Operator - -It works same as `if` function. - -Syntax: `cond ? then : else` - -Returns `then` if the `cond` evaluates to be true (greater than zero), otherwise returns `else`. - -- `cond` must be of type of `UInt8`, and `then` and `else` must have the lowest common type. - -- `then` and `else` can be `NULL` - -**See also** - -- [ifNotFinite](../../sql-reference/functions/other-functions.md#ifnotfinite). - -## multiIf - -Allows you to write the [CASE](../../sql-reference/operators/index.md#operator_case) operator more compactly in the query. - -**Syntax** - -``` sql -multiIf(cond_1, then_1, cond_2, then_2, ..., else) -``` - -You can use the [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) setting to calculate the `multiIf` function according to a short scheme. If this setting is enabled, `then_i` expression is evaluated only on rows where `((NOT cond_1) AND (NOT cond_2) AND ... AND (NOT cond_{i-1}) AND cond_i)` is true, `cond_i` will be evaluated only on rows where `((NOT cond_1) AND (NOT cond_2) AND ... AND (NOT cond_{i-1}))` is true. For example, an exception about division by zero is not thrown when executing the query `SELECT multiIf(number = 2, intDiv(1, number), number = 5) FROM numbers(10)`. - -**Arguments** - -- `cond_N` — The condition for the function to return `then_N`. -- `then_N` — The result of the function when executed. -- `else` — The result of the function if none of the conditions is met. - -The function accepts `2N+1` parameters. - -**Returned values** - -The function returns one of the values `then_N` or `else`, depending on the conditions `cond_N`. - -**Example** - -Again using `LEFT_RIGHT` table. - ``` sql SELECT left, diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 71b7fa07f18..15644b54c2b 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1,14 +1,14 @@ --- slug: /en/sql-reference/functions/date-time-functions -sidebar_position: 39 +sidebar_position: 45 sidebar_label: Dates and Times --- # Functions for Working with Dates and Times -Support for time zones. +Most functions in this section accept an optional time zone argument, e.g. `Europe/Amsterdam`. In this case, the time zone is the specified one instead of the local (default) one. -All functions for working with the date and time that have a logical use for the time zone can accept a second optional time zone argument. Example: Asia/Yekaterinburg. In this case, they use the specified time zone instead of the local (default) one. +**Example** ``` sql SELECT @@ -24,14 +24,98 @@ SELECT └─────────────────────┴────────────┴────────────┴─────────────────────┘ ``` -## timeZone +## makeDate -Returns the timezone of the server. -If it is executed in the context of a distributed table, then it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +Creates a [Date](../../sql-reference/data-types/date.md) from a year, month and day argument. **Syntax** ``` sql +makeDate(year, month, day) +``` + +**Arguments** + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +**Returned value** + +- A date created from the arguments. + +Type: [Date](../../sql-reference/data-types/date.md). + +**Example** + +``` sql +SELECT makeDate(2023, 2, 28) AS Date; +``` + +Result: + +``` text +┌───────date─┐ +│ 2023-02-28 │ +└────────────┘ +``` + +## makeDate32 + +Like [makeDate](#makeDate) but produces a [Date32](../../sql-reference/data-types/date32.md). + +## makeDateTime + +Creates a [DateTime](../../sql-reference/data-types/datetime.md) from a year, month, day, hour, minute and second argument. + +**Syntax** + +``` sql +makeDateTime(year, month, day, hour, minute, second[, timezone]) +``` + +**Arguments** + +- `year` — Year. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `month` — Month. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `day` — Day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `hour` — Hour. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `minute` — Minute. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `second` — Second. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). + +**Returned value** + +- A date with time created from the arguments. + +Type: [DateTime](../../sql-reference/data-types/datetime.md). + +**Example** + +``` sql +SELECT makeDateTime(2023, 2, 28, 17, 12, 33) AS DateTime; +``` + +Result: + +``` text +┌────────────DateTime─┐ +│ 2023-02-28 17:12:33 │ +└─────────────────────┘ +``` + +## makeDateTime64 + +Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). + +## timeZone + +Returns the timezone of the server. +If the function is executed in the context of a distributed table, it generates a normal column with values relevant to each shard, otherwise it produces a constant value. + +**Syntax** + +```sql timeZone() ``` @@ -39,13 +123,13 @@ Alias: `timezone`. **Returned value** -- Timezone. +- Timezone. Type: [String](../../sql-reference/data-types/string.md). ## toTimeZone -Converts time or date and time to the specified time zone. The time zone is an attribute of the `Date` and `DateTime` data types. The internal value (number of seconds) of the table field or of the resultset's column does not change, the column's type changes and its string representation changes accordingly. +Converts a date or date with time to the specified time zone. Does not change the internal value (number of unix seconds) of the data, only the value's time zone attribute and the value's string representation changes. **Syntax** @@ -57,19 +141,17 @@ Alias: `toTimezone`. **Arguments** -- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). +- `value` — Time or date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — Timezone for the returned value. [String](../../sql-reference/data-types/string.md). This argument is a constant, because `toTimezone` changes the timezone of a column (timezone is an attribute of `DateTime*` types). **Returned value** -- Date and time. +- Date and time. Type: [DateTime](../../sql-reference/data-types/datetime.md). **Example** -Query: - ```sql SELECT toDateTime('2019-01-01 00:00:00', 'UTC') AS time_utc, toTypeName(time_utc) AS type_utc, @@ -99,8 +181,6 @@ type_samoa: DateTime('US/Samoa') int32samoa: 1546300800 ``` -`toTimeZone(time_utc, 'Asia/Yekaterinburg')` changes the `DateTime('UTC')` type to `DateTime('Asia/Yekaterinburg')`. The value (Unixtimestamp) 1546300800 stays the same, but the string representation (the result of the toString() function) changes from `time_utc: 2019-01-01 00:00:00` to `time_yekat: 2019-01-01 05:00:00`. - ## timeZoneOf Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types. @@ -115,17 +195,16 @@ Alias: `timezoneOf`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned value** -- Timezone name. +- Timezone name. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: ``` sql SELECT timezoneOf(now()); ``` @@ -139,8 +218,9 @@ Result: ## timeZoneOffset -Returns a timezone offset in seconds from [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). The function takes into account [daylight saving time](https://en.wikipedia.org/wiki/Daylight_saving_time) and historical timezone changes at the specified date and time. -[IANA timezone database](https://www.iana.org/time-zones) is used to calculate the offset. +Returns the timezone offset in seconds from [UTC](https://en.wikipedia.org/wiki/Coordinated_Universal_Time). +The function [daylight saving time](https://en.wikipedia.org/wiki/Daylight_saving_time) and historical timezone changes at the specified date and time into account. +The [IANA timezone database](https://www.iana.org/time-zones) is used to calculate the offset. **Syntax** @@ -152,18 +232,16 @@ Alias: `timezoneOffset`. **Arguments** -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned value** -- Offset from UTC in seconds. +- Offset from UTC in seconds. Type: [Int32](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT toDateTime('2021-04-21 10:20:30', 'America/New_York') AS Time, toTypeName(Time) AS Type, timeZoneOffset(Time) AS Offset_in_seconds, (Offset_in_seconds / 3600) AS Offset_in_hours; @@ -179,37 +257,37 @@ Result: ## toYear -Converts a date or date with time to a UInt16 number containing the year number (AD). +Converts a date or date with time to the year number (AD) as UInt16 value. Alias: `YEAR`. ## toQuarter -Converts a date or date with time to a UInt8 number containing the quarter number. +Converts a date or date with time to the quarter number as UInt8 value. Alias: `QUARTER`. ## toMonth -Converts a date or date with time to a UInt8 number containing the month number (1-12). +Converts a date or date with time to the month number (1-12) as UInt8 value. Alias: `MONTH`. ## toDayOfYear -Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). +Converts a date or date with time to the number of the day of the year (1-366) as UInt16 value. Alias: `DAYOFYEAR`. ## toDayOfMonth -Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). +Converts a date or date with time to the number of the day in the month (1-31) as UInt8 value. Aliases: `DAYOFMONTH`, `DAY`. ## toDayOfWeek -Converts a date or date with time to a UInt8 number containing the number of the day of the week. +Converts a date or date with time to the number of the day in the week as UInt8 value. The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is ommited, the default mode is 0. The time zone of the date can be specified as the third argument. @@ -230,27 +308,28 @@ toDayOfWeek(t[, mode[, timezone]]) ## toHour -Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23). -This function assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true – even in Moscow the clocks were twice changed at a different time). +Converts a date with time the number of the hour in 24-hour time (0-23) as UInt8 value. + +Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true – even in Moscow the clocks were twice changed at a different time). Alias: `HOUR`. ## toMinute -Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59). +Converts a date with time to the number of the minute of the hour (0-59) as UInt8 value. Alias: `MINUTE`. ## toSecond -Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). -Leap seconds are not accounted for. +Converts a date with time to the second in the minute (0-59) as UInt8 value. Leap seconds are not considered. Alias: `SECOND`. ## toUnixTimestamp -For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). +For DateTime arguments: converts the value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). + For String argument: converts the input string to the datetime according to the timezone (optional second argument, server timezone is used by default) and returns the corresponding unix timestamp. **Syntax** @@ -262,14 +341,12 @@ toUnixTimestamp(str, [timezone]) **Returned value** -- Returns the unix timestamp. +- Returns the unix timestamp. Type: `UInt32`. **Example** -Query: - ``` sql SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp ``` @@ -367,12 +444,12 @@ toStartOfSecond(value, [timezone]) **Arguments** -- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). **Returned value** -- Input value without sub-seconds. +- Input value without sub-seconds. Type: [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -410,7 +487,7 @@ Result: **See also** -- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter. +- [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) server configuration parameter. ## toStartOfFiveMinutes @@ -499,9 +576,9 @@ The following table describes how the mode argument works. For mode values with a meaning of “with 4 or more days this year,” weeks are numbered according to ISO 8601:1988: -- If the week containing January 1 has 4 or more days in the new year, it is week 1. +- If the week containing January 1 has 4 or more days in the new year, it is week 1. -- Otherwise, it is the last week of the previous year, and the next week is week 1. +- Otherwise, it is the last week of the previous year, and the next week is week 1. For mode values with a meaning of “contains January 1”, the week contains January 1 is week 1. It does not matter how many days in the new year the week contained, even if it contained only one day. @@ -513,9 +590,9 @@ toWeek(t[, mode[, time_zone]]) **Arguments** -- `t` – Date or DateTime. -- `mode` – Optional parameter, Range of values is \[0,9\], default is 0. -- `Timezone` – Optional parameter, it behaves like any other conversion function. +- `t` – Date or DateTime. +- `mode` – Optional parameter, Range of values is \[0,9\], default is 0. +- `Timezone` – Optional parameter, it behaves like any other conversion function. **Example** @@ -570,7 +647,7 @@ age('unit', startdate, enddate, [timezone]) **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - `second` (possible abbreviations: `ss`, `s`) @@ -582,11 +659,11 @@ age('unit', startdate, enddate, [timezone]) - `quarter` (possible abbreviations: `qq`, `q`) - `year` (possible abbreviations: `yyyy`, `yy`) -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -596,8 +673,6 @@ Type: [Int](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00')); ``` @@ -610,8 +685,6 @@ Result: └───────────────────────────────────────────────────────────────────────────────────┘ ``` -Query: - ``` sql SELECT toDate('2022-01-01') AS e, @@ -645,11 +718,11 @@ For an alternative to `date\_diff`, see function `age`. date_diff('unit', startdate, enddate, [timezone]) ``` -Aliases: `dateDiff`, `DATE_DIFF`. +Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_DIFF`. **Arguments** -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - `second` (possible abbreviations: `ss`, `s`) @@ -661,11 +734,11 @@ Aliases: `dateDiff`, `DATE_DIFF`. - `quarter` (possible abbreviations: `qq`, `q`) - `year` (possible abbreviations: `yyyy`, `yy`) -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -675,8 +748,6 @@ Type: [Int](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); ``` @@ -689,8 +760,6 @@ Result: └────────────────────────────────────────────────────────────────────────────────────────┘ ``` -Query: - ``` sql SELECT toDate('2022-01-01') AS e, @@ -722,7 +791,7 @@ Alias: `dateTrunc`. **Arguments** -- `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal). +- `unit` — The type of interval to truncate the result. [String Literal](../syntax.md#syntax-string-literal). Possible values: - `second` @@ -734,12 +803,12 @@ Alias: `dateTrunc`. - `quarter` - `year` -- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). +- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). If not specified, the function uses the timezone of the `value` parameter. [String](../../sql-reference/data-types/string.md). **Returned value** -- Value, truncated to the specified part of date. +- Value, truncated to the specified part of date. Type: [DateTime](../../sql-reference/data-types/datetime.md). @@ -775,7 +844,7 @@ Result: **See Also** -- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) +- [toStartOfInterval](#tostartofintervaltime-or-data-interval-x-unit-time-zone) ## date\_add @@ -791,7 +860,7 @@ Aliases: `dateAdd`, `DATE_ADD`. **Arguments** -- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). Possible values: - `second` @@ -803,8 +872,8 @@ Aliases: `dateAdd`, `DATE_ADD`. - `quarter` - `year` -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). +- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** @@ -814,8 +883,6 @@ Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-ref **Example** -Query: - ```sql SELECT date_add(YEAR, 3, toDate('2018-01-01')); ``` @@ -842,7 +909,7 @@ Aliases: `dateSub`, `DATE_SUB`. **Arguments** -- `unit` — The type of interval to subtract. Note: The unit should be unquoted. +- `unit` — The type of interval to subtract. Note: The unit should be unquoted. Possible values: @@ -855,8 +922,8 @@ Aliases: `dateSub`, `DATE_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). +- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** @@ -866,8 +933,6 @@ Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-ref **Example** -Query: - ``` sql SELECT date_sub(YEAR, 3, toDate('2018-01-01')); ``` @@ -894,9 +959,9 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). -- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). +- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). +- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). Possible values: - `second` @@ -916,8 +981,6 @@ Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-ref **Example** -Query: - ```sql select timestamp_add(toDate('2018-01-01'), INTERVAL 3 MONTH); ``` @@ -944,7 +1007,7 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. **Arguments** -- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to subtract. [String](../../sql-reference/data-types/string.md). Possible values: - `second` @@ -956,8 +1019,8 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. - `quarter` - `year` -- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). +- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** @@ -967,8 +1030,6 @@ Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-ref **Example** -Query: - ```sql select timestamp_sub(MONTH, 5, toDateTime('2018-12-18 01:02:03')); ``` @@ -993,11 +1054,11 @@ now([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). **Returned value** -- Current date and time. +- Current date and time. Type: [DateTime](../../sql-reference/data-types/datetime.md). @@ -1043,12 +1104,12 @@ now64([scale], [timezone]) **Arguments** -- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. Typically are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. Typically are used - 3 (default) (milliseconds), 6 (microseconds), 9 (nanoseconds). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). **Returned value** -- Current date and time with sub-second precision. +- Current date and time with sub-second precision. Type: [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -1080,11 +1141,11 @@ nowInBlock([timezone]) **Arguments** -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). **Returned value** -- Current date and time at the moment of processing of each block of data. +- Current date and time at the moment of processing of each block of data. Type: [DateTime](../../sql-reference/data-types/datetime.md). @@ -1264,7 +1325,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %d | day of the month, zero-padded (01-31) | 02 | | %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 | | %e | day of the month, space-padded (1-31) |   2 | -| %f | fractional second from the fractional part of DateTime64 | 1234560 | +| %f | fractional second, see 'Note 1' below | 1234560 | | %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 | | %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 | | %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 | @@ -1276,16 +1337,16 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %k | hour in 24h format (00-23) | 22 | | %l | hour in 12h format (01-12) | 09 | | %m | month as an integer number (01-12) | 01 | -| %M | minute (00-59) | 33 | +| %M | full month name (January-December), see 'Note 2' below | January | | %n | new-line character (‘’) | | | %p | AM or PM designation | PM | | %Q | Quarter (1-4) | 1 | -| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%M %p | 10:30 PM | -| %R | 24-hour HH:MM time, equivalent to %H:%M | 22:33 | +| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM | +| %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 | | %s | second (00-59) | 44 | | %S | second (00-59) | 44 | | %t | horizontal-tab character (’) | | -| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 | +| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S | 22:33:44 | | %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 | | %V | ISO 8601 week number (01-53) | 01 | | %w | weekday as a integer number with Sunday as 0 (0-6) | 2 | @@ -1295,9 +1356,11 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %z | Time offset from UTC as +HHMM or -HHMM | -0500 | | %% | a % sign | % | -**Example** +Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0) if the formatted value is a Date, Date32 or DateTime (which have no fractional seconds) or a DateTime64 with a precision of 0. The previous behavior can be restored using setting `formatdatetime_f_prints_single_zero = 1`. -Query: +Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`. + +**Example** ``` sql SELECT formatDateTime(toDate('2010-01-04'), '%g') @@ -1311,8 +1374,6 @@ Result: └────────────────────────────────────────────┘ ``` -Query: - ``` sql SELECT formatDateTime(toDateTime64('2010-01-04 12:34:56.123456', 7), '%f') ``` @@ -1327,7 +1388,7 @@ Result: **See Also** -- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) +- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) ## formatDateTimeInJodaSyntax {#date_time_functions-formatDateTimeInJodaSyntax} @@ -1369,8 +1430,6 @@ Using replacement fields, you can define a pattern for the resulting string. **Example** -Query: - ``` sql SELECT formatDateTimeInJodaSyntax(toDateTime('2010-01-04 12:34:56'), 'yyyy-MM-dd HH:mm:ss') ``` @@ -1396,20 +1455,18 @@ dateName(date_part, date) **Arguments** -- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). -- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). +- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md). +- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md). **Returned value** -- The specified part of date. +- The specified part of date. Type: [String](../../sql-reference/data-types/string.md#string) **Example** -Query: - ```sql WITH toDateTime('2021-04-14 11:22:33') AS date_value SELECT @@ -1438,18 +1495,16 @@ monthName(date) **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** -- The name of the month. +- The name of the month. Type: [String](../../sql-reference/data-types/string.md#string) **Example** -Query: - ```sql WITH toDateTime('2021-04-14 11:22:33') AS date_value SELECT monthName(date_value); @@ -1473,8 +1528,6 @@ Alias: `FROM_UNIXTIME`. **Example:** -Query: - ```sql SELECT fromUnixTimestamp(423543535); ``` @@ -1503,7 +1556,7 @@ SELECT fromUnixTimestamp(1234334543, '%Y-%m-%d %R:%S') AS DateTime; **See Also** -- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax) +- [fromUnixTimestampInJodaSyntax](##fromUnixTimestampInJodaSyntax) ## fromUnixTimestampInJodaSyntax @@ -1511,7 +1564,6 @@ Similar to fromUnixTimestamp, except that it formats time in Joda style instead **Example:** -Query: ``` sql SELECT fromUnixTimestampInJodaSyntax(1669804872, 'yyyy-MM-dd HH:mm:ss', 'UTC'); ``` @@ -1535,18 +1587,16 @@ toModifiedJulianDay(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. +- Modified Julian Day number. Type: [Int32](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT toModifiedJulianDay('2020-01-01'); ``` @@ -1571,18 +1621,16 @@ toModifiedJulianDayOrNull(date) **Arguments** -- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `date` — Date in text form. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). **Returned value** -- Modified Julian Day number. +- Modified Julian Day number. Type: [Nullable(Int32)](../../sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT toModifiedJulianDayOrNull('2020-01-01'); ``` @@ -1607,18 +1655,16 @@ fromModifiedJulianDay(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). **Returned value** -- Date in text form. +- Date in text form. Type: [String](../../sql-reference/data-types/string.md) **Example** -Query: - ``` sql SELECT fromModifiedJulianDay(58849); ``` @@ -1643,18 +1689,16 @@ fromModifiedJulianDayOrNull(day) **Arguments** -- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). +- `day` — Modified Julian Day number. [Any integral types](../../sql-reference/data-types/int-uint.md). **Returned value** -- Date in text form. +- Date in text form. Type: [Nullable(String)](../../sql-reference/data-types/string.md) **Example** -Query: - ``` sql SELECT fromModifiedJulianDayOrNull(58849); ``` diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index 293e02f8a54..660af6912b0 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -1,7 +1,10 @@ --- slug: /en/sql-reference/functions/distance-functions +sidebar_position: 55 +sidebar_label: Distance --- -# Distance functions + +# Distance Functions ## L1Norm @@ -17,11 +20,11 @@ Alias: `normL1`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. +- L1-norm or [taxicab geometry](https://en.wikipedia.org/wiki/Taxicab_geometry) distance. Type: [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). @@ -55,11 +58,11 @@ Alias: `normL2`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). +- L2-norm or [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance). Type: [Float](../../sql-reference/data-types/float.md). @@ -93,11 +96,11 @@ Alias: `normLinf`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- Linf-norm or the maximum absolute value. +- Linf-norm or the maximum absolute value. Type: [Float](../../sql-reference/data-types/float.md). @@ -131,12 +134,12 @@ Alias: `normLp`. **Arguments** -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `p` — The power. Possible values: real number in `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). **Returned value** -- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm) +- [Lp-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm) Type: [Float](../../sql-reference/data-types/float.md). @@ -170,12 +173,12 @@ Alias: `distanceL1`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- 1-norm distance. +- 1-norm distance. Type: [Float](../../sql-reference/data-types/float.md). @@ -209,12 +212,12 @@ Alias: `distanceL2`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- 2-norm distance. +- 2-norm distance. Type: [Float](../../sql-reference/data-types/float.md). @@ -248,12 +251,12 @@ Alias: `distanceLinf`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector1` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- Infinity-norm distance. +- Infinity-norm distance. Type: [Float](../../sql-reference/data-types/float.md). @@ -287,13 +290,13 @@ Alias: `distanceLp`. **Arguments** -- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `vector1` — First vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector2` — Second vector. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `p` — The power. Possible values: real number from `[1; inf)`. [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). **Returned value** -- p-norm distance. +- p-norm distance. Type: [Float](../../sql-reference/data-types/float.md). @@ -328,11 +331,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Unit vector. +- Unit vector. Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). @@ -366,11 +369,11 @@ Alias: `normalizeL1`. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Unit vector. +- Unit vector. Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). @@ -404,11 +407,11 @@ Alias: `normalizeLinf `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Unit vector. +- Unit vector. Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). @@ -442,12 +445,12 @@ Alias: `normalizeLp `. **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `p` — The power. Possible values: any number from [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `p` — The power. Possible values: any number from [1;inf). [UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). **Returned value** -- Unit vector. +- Unit vector. Type: [Tuple](../../sql-reference/data-types/tuple.md) of [Float](../../sql-reference/data-types/float.md). @@ -479,12 +482,12 @@ cosineDistance(vector1, vector2) **Arguments** -- `vector1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). -- `vector2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). +- `vector2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). **Returned value** -- Cosine of the angle between two vectors substracted from one. +- Cosine of the angle between two vectors substracted from one. Type: [Float](../../sql-reference/data-types/float.md). diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md index cccc02c2553..618dd3f4b4f 100644 --- a/docs/en/sql-reference/functions/encoding-functions.md +++ b/docs/en/sql-reference/functions/encoding-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/encoding-functions -sidebar_position: 52 +sidebar_position: 65 sidebar_label: Encoding --- @@ -18,11 +18,11 @@ char(number_1, [number_2, ..., number_n]); **Arguments** -- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). +- `number_1, number_2, ..., number_n` — Numerical arguments interpreted as integers. Types: [Int](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md). **Returned value** -- a string of given bytes. +- a string of given bytes. Type: `String`. @@ -98,11 +98,11 @@ Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order str **Arguments** -- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to hexadecimal. Types: [String](../../sql-reference/data-types/string.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** -- A string with the hexadecimal representation of the argument. +- A string with the hexadecimal representation of the argument. Type: [String](../../sql-reference/data-types/string.md). @@ -185,13 +185,13 @@ unhex(arg) **Arguments** -- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). +- `arg` — A string containing any number of hexadecimal digits. Type: [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md). Supports both uppercase and lowercase letters `A-F`. The number of hexadecimal digits does not have to be even. If it is odd, the last digit is interpreted as the least significant half of the `00-0F` byte. If the argument string contains anything other than hexadecimal digits, some implementation-defined result is returned (an exception isn’t thrown). For a numeric argument the inverse of hex(N) is not performed by unhex(). **Returned value** -- A binary string (BLOB). +- A binary string (BLOB). Type: [String](../../sql-reference/data-types/string.md). @@ -247,11 +247,11 @@ Values of [UUID](../data-types/uuid.md) type are encoded as big-endian order str **Arguments** -- `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md). +- `arg` — A value to convert to binary. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md), [Decimal](../../sql-reference/data-types/decimal.md), [Date](../../sql-reference/data-types/date.md), or [DateTime](../../sql-reference/data-types/datetime.md). **Returned value** -- A string with the binary representation of the argument. +- A string with the binary representation of the argument. Type: [String](../../sql-reference/data-types/string.md). @@ -338,11 +338,11 @@ Supports binary digits `0` and `1`. The number of binary digits does not have to **Arguments** -- `arg` — A string containing any number of binary digits. [String](../../sql-reference/data-types/string.md). +- `arg` — A string containing any number of binary digits. [String](../../sql-reference/data-types/string.md). **Returned value** -- A binary string (BLOB). +- A binary string (BLOB). Type: [String](../../sql-reference/data-types/string.md). @@ -396,11 +396,11 @@ bitPositionsToArray(arg) **Arguments** -- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md). +- `arg` — Integer value. [Int/UInt](../../sql-reference/data-types/int-uint.md). **Returned value** -- An array containing a list of positions of bits that equal `1`, in ascending order. +- An array containing a list of positions of bits that equal `1`, in ascending order. Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 1edb20fd9ed..1224b7bc92b 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -1,8 +1,7 @@ --- slug: /en/sql-reference/functions/encryption-functions -sidebar_position: 67 +sidebar_position: 70 sidebar_label: Encryption -title: "Encryption functions" --- These functions implement encryption and decryption of data with AES (Advanced Encryption Standard) algorithm. @@ -17,11 +16,11 @@ Note that these functions work slowly until ClickHouse 21.1. This function encrypts data using these modes: -- aes-128-ecb, aes-192-ecb, aes-256-ecb -- aes-128-cbc, aes-192-cbc, aes-256-cbc -- aes-128-ofb, aes-192-ofb, aes-256-ofb -- aes-128-gcm, aes-192-gcm, aes-256-gcm -- aes-128-ctr, aes-192-ctr, aes-256-ctr +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm +- aes-128-ctr, aes-192-ctr, aes-256-ctr **Syntax** @@ -31,15 +30,15 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — Text thats need to be encrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `aad` — Additional authenticated data. It isn't encrypted, but it affects decryption. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). +- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string). **Examples** @@ -112,9 +111,9 @@ Will produce the same ciphertext as `encrypt` on equal inputs. But when `key` or Supported encryption modes: -- aes-128-ecb, aes-192-ecb, aes-256-ecb -- aes-128-cbc, aes-192-cbc, aes-256-cbc -- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-ofb, aes-192-ofb, aes-256-ofb **Syntax** @@ -124,10 +123,10 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv]) **Arguments** -- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). -- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). +- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). +- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). **Returned value** @@ -217,11 +216,11 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv This function decrypts ciphertext into a plaintext using these modes: -- aes-128-ecb, aes-192-ecb, aes-256-ecb -- aes-128-cbc, aes-192-cbc, aes-256-cbc -- aes-128-ofb, aes-192-ofb, aes-256-ofb -- aes-128-gcm, aes-192-gcm, aes-256-gcm -- aes-128-ctr, aes-192-ctr, aes-256-ctr +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-gcm, aes-192-gcm, aes-256-gcm +- aes-128-ctr, aes-192-ctr, aes-256-ctr **Syntax** @@ -231,15 +230,15 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). -- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Required for `-gcm` modes, optinal for others. [String](../../sql-reference/data-types/string.md#string). +- `aad` — Additional authenticated data. Won't decrypt if this value is incorrect. Works only in `-gcm` modes, for others would throw an exception. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../../sql-reference/data-types/string.md#string). **Examples** @@ -349,10 +348,10 @@ Will produce same plaintext as `decrypt` on equal inputs. But when `key` or `iv` Supported decryption modes: -- aes-128-ecb, aes-192-ecb, aes-256-ecb -- aes-128-cbc, aes-192-cbc, aes-256-cbc -- aes-128-cfb128 -- aes-128-ofb, aes-192-ofb, aes-256-ofb +- aes-128-ecb, aes-192-ecb, aes-256-ecb +- aes-128-cbc, aes-192-cbc, aes-256-cbc +- aes-128-cfb128 +- aes-128-ofb, aes-192-ofb, aes-256-ofb **Syntax** @@ -362,14 +361,14 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv]) **Arguments** -- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). -- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). -- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). -- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). +- `mode` — Decryption mode. [String](../../sql-reference/data-types/string.md#string). +- `ciphertext` — Encrypted text that needs to be decrypted. [String](../../sql-reference/data-types/string.md#string). +- `key` — Decryption key. [String](../../sql-reference/data-types/string.md#string). +- `iv` — Initialization vector. Optinal. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Decrypted String. [String](../../sql-reference/data-types/string.md#string). +- Decrypted String. [String](../../sql-reference/data-types/string.md#string). **Examples** diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 07226b67601..7d8aa2c0390 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/ext-dict-functions -sidebar_position: 58 +sidebar_position: 50 sidebar_label: Dictionaries --- @@ -24,16 +24,16 @@ dictGetOrNull('dict_name', attr_name, id_expr) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. -- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `attr_names` — Name of the column of the dictionary, [String literal](../../sql-reference/syntax.md#syntax-string-literal), or tuple of column names, [Tuple](../../sql-reference/data-types/tuple.md)([String literal](../../sql-reference/syntax.md#syntax-string-literal)). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `default_value_expr` — Values returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) or [Tuple](../../sql-reference/data-types/tuple.md)([Expression](../../sql-reference/syntax.md#syntax-expressions)), returning the value (or values) in the data types configured for the `attr_names` attribute. **Returned value** -- If ClickHouse parses the attribute successfully in the [attribute’s data type](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`. +- If ClickHouse parses the attribute successfully in the [attribute’s data type](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`. -- If there is no the key, corresponding to `id_expr`, in the dictionary, then: +- If there is no the key, corresponding to `id_expr`, in the dictionary, then: - `dictGet` returns the content of the `` element specified for the attribute in the dictionary configuration. - `dictGetOrDefault` returns the value passed as the `default_value_expr` parameter. @@ -226,7 +226,7 @@ Result: **See Also** -- [Dictionaries](../../sql-reference/dictionaries/index.md) +- [Dictionaries](../../sql-reference/dictionaries/index.md) ## dictHas @@ -238,13 +238,13 @@ dictHas('dict_name', id_expr) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning dictionary key-type value or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. **Returned value** -- 0, if there is no key. -- 1, if there is a key. +- 0, if there is no key. +- 1, if there is a key. Type: `UInt8`. @@ -260,12 +260,12 @@ dictGetHierarchy('dict_name', key) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned value** -- Parents for the key. +- Parents for the key. Type: [Array(UInt64)](../../sql-reference/data-types/array.md). @@ -279,14 +279,14 @@ dictIsIn('dict_name', child_id_expr, ancestor_id_expr) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `child_id_expr` — Key to be checked. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `ancestor_id_expr` — Alleged ancestor of the `child_id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned value** -- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. -- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. +- 0, if `child_id_expr` is not a child of `ancestor_id_expr`. +- 1, if `child_id_expr` is a child of `ancestor_id_expr` or if `child_id_expr` is an `ancestor_id_expr`. Type: `UInt8`. @@ -302,12 +302,12 @@ dictGetChildren(dict_name, key) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. **Returned values** -- First-level descendants for the key. +- First-level descendants for the key. Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). @@ -351,13 +351,13 @@ dictGetDescendants(dict_name, key, level) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. -- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../../sql-reference/data-types/int-uint.md). +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `key` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md)-type value. +- `level` — Hierarchy level. If `level = 0` returns all descendants to the end. [UInt8](../../sql-reference/data-types/int-uint.md). **Returned values** -- Descendants for the key. +- Descendants for the key. Type: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md)). @@ -409,14 +409,14 @@ ClickHouse supports specialized functions that convert dictionary attribute valu Functions: -- `dictGetInt8`, `dictGetInt16`, `dictGetInt32`, `dictGetInt64` -- `dictGetUInt8`, `dictGetUInt16`, `dictGetUInt32`, `dictGetUInt64` -- `dictGetFloat32`, `dictGetFloat64` -- `dictGetDate` -- `dictGetDateTime` -- `dictGetUUID` -- `dictGetString` -- `dictGetIPv4`, `dictGetIPv6` +- `dictGetInt8`, `dictGetInt16`, `dictGetInt32`, `dictGetInt64` +- `dictGetUInt8`, `dictGetUInt16`, `dictGetUInt32`, `dictGetUInt64` +- `dictGetFloat32`, `dictGetFloat64` +- `dictGetDate` +- `dictGetDateTime` +- `dictGetUUID` +- `dictGetString` +- `dictGetIPv4`, `dictGetIPv6` All these functions have the `OrDefault` modification. For example, `dictGetDateOrDefault`. @@ -429,16 +429,16 @@ dictGet[Type]OrDefault('dict_name', 'attr_name', id_expr, default_value_expr) **Arguments** -- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). -- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. -- `default_value_expr` — Value returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute. +- `dict_name` — Name of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `attr_name` — Name of the column of the dictionary. [String literal](../../sql-reference/syntax.md#syntax-string-literal). +- `id_expr` — Key value. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning a [UInt64](../../sql-reference/data-types/int-uint.md) or [Tuple](../../sql-reference/data-types/tuple.md)-type value depending on the dictionary configuration. +- `default_value_expr` — Value returned if the dictionary does not contain a row with the `id_expr` key. [Expression](../../sql-reference/syntax.md#syntax-expressions) returning the value in the data type configured for the `attr_name` attribute. **Returned value** -- If ClickHouse parses the attribute successfully in the [attribute’s data type](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`. +- If ClickHouse parses the attribute successfully in the [attribute’s data type](../../sql-reference/dictionaries/index.md#dictionary-key-and-fields#ext_dict_structure-attributes), functions return the value of the dictionary attribute that corresponds to `id_expr`. -- If there is no requested `id_expr` in the dictionary then: +- If there is no requested `id_expr` in the dictionary then: - `dictGet[Type]` returns the content of the `` element specified for the attribute in the dictionary configuration. - `dictGet[Type]OrDefault` returns the value passed as the `default_value_expr` parameter. diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index 91279cd991a..5cd2d8e0a74 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -1,13 +1,14 @@ --- slug: /en/sql-reference/functions/files -sidebar_position: 43 +sidebar_position: 75 sidebar_label: Files -title: "Functions for Working with Files" --- ## file -Reads file as a String. The file content is not parsed, so any information is read as one string and placed into the specified column. +Reads file as string and loads the data into the specified column. The actual file content is not interpreted. + +Also see table function [file](../table-functions/file.md). **Syntax** @@ -17,8 +18,8 @@ file(path[, default]) **Arguments** -- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. -- `default` — The value that will be returned in the case when a file does not exist or cannot be accessed. Data types supported: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). +- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports the following wildcards: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. +- `default` — The value that will be returned in the case the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** @@ -29,8 +30,3 @@ Query: ``` sql INSERT INTO table SELECT file('a.txt'), file('b.txt'); ``` - -**See Also** - -- [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path) -- [file](../table-functions/file.md) diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index 7dc798aa6c1..0b7df54b776 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/functions-for-nulls -sidebar_position: 63 +sidebar_position: 135 sidebar_label: Nullable --- @@ -18,12 +18,12 @@ Alias: `ISNULL`. **Arguments** -- `x` — A value with a non-compound data type. +- `x` — A value with a non-compound data type. **Returned value** -- `1` if `x` is `NULL`. -- `0` if `x` is not `NULL`. +- `1` if `x` is `NULL`. +- `0` if `x` is not `NULL`. **Example** @@ -58,12 +58,12 @@ isNotNull(x) **Arguments:** -- `x` — A value with a non-compound data type. +- `x` — A value with a non-compound data type. **Returned value** -- `0` if `x` is `NULL`. -- `1` if `x` is not `NULL`. +- `0` if `x` is `NULL`. +- `1` if `x` is not `NULL`. **Example** @@ -98,12 +98,12 @@ coalesce(x,...) **Arguments:** -- Any number of parameters of a non-compound type. All parameters must be compatible by data type. +- Any number of parameters of a non-compound type. All parameters must be compatible by data type. **Returned values** -- The first non-`NULL` argument. -- `NULL`, if all arguments are `NULL`. +- The first non-`NULL` argument. +- `NULL`, if all arguments are `NULL`. **Example** @@ -141,13 +141,13 @@ ifNull(x,alt) **Arguments:** -- `x` — The value to check for `NULL`. -- `alt` — The value that the function returns if `x` is `NULL`. +- `x` — The value to check for `NULL`. +- `alt` — The value that the function returns if `x` is `NULL`. **Returned values** -- The value `x`, if `x` is not `NULL`. -- The value `alt`, if `x` is `NULL`. +- The value `x`, if `x` is not `NULL`. +- The value `alt`, if `x` is `NULL`. **Example** @@ -185,8 +185,8 @@ nullIf(x, y) **Returned values** -- `NULL`, if the arguments are equal. -- The `x` value, if the arguments are not equal. +- `NULL`, if the arguments are equal. +- The `x` value, if the arguments are not equal. **Example** @@ -220,12 +220,12 @@ assumeNotNull(x) **Arguments:** -- `x` — The original value. +- `x` — The original value. **Returned values** -- The original value from the non-`Nullable` type, if it is not `NULL`. -- Implementation specific result if the original value was `NULL`. +- The original value from the non-`Nullable` type, if it is not `NULL`. +- Implementation specific result if the original value was `NULL`. **Example** @@ -282,11 +282,11 @@ toNullable(x) **Arguments:** -- `x` — The value of any non-compound type. +- `x` — The value of any non-compound type. **Returned value** -- The input value with a `Nullable` type. +- The input value with a `Nullable` type. **Example** diff --git a/docs/en/sql-reference/functions/geo/coordinates.md b/docs/en/sql-reference/functions/geo/coordinates.md index 01802e336bf..1cbc1933206 100644 --- a/docs/en/sql-reference/functions/geo/coordinates.md +++ b/docs/en/sql-reference/functions/geo/coordinates.md @@ -15,10 +15,10 @@ greatCircleDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg) **Input parameters** -- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`. -- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`. -- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`. -- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`. +- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`. +- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`. +- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`. +- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`. Positive values correspond to North latitude and East longitude, and negative values correspond to South latitude and West longitude. @@ -53,10 +53,10 @@ geoDistance(lon1Deg, lat1Deg, lon2Deg, lat2Deg) **Input parameters** -- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`. -- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`. -- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`. -- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`. +- `lon1Deg` — Longitude of the first point in degrees. Range: `[-180°, 180°]`. +- `lat1Deg` — Latitude of the first point in degrees. Range: `[-90°, 90°]`. +- `lon2Deg` — Longitude of the second point in degrees. Range: `[-180°, 180°]`. +- `lat2Deg` — Latitude of the second point in degrees. Range: `[-90°, 90°]`. Positive values correspond to North latitude and East longitude, and negative values correspond to South latitude and West longitude. @@ -88,10 +88,10 @@ greatCircleAngle(lon1Deg, lat1Deg, lon2Deg, lat2Deg) **Input parameters** -- `lon1Deg` — Longitude of the first point in degrees. -- `lat1Deg` — Latitude of the first point in degrees. -- `lon2Deg` — Longitude of the second point in degrees. -- `lat2Deg` — Latitude of the second point in degrees. +- `lon1Deg` — Longitude of the first point in degrees. +- `lat1Deg` — Latitude of the first point in degrees. +- `lon2Deg` — Longitude of the second point in degrees. +- `lat2Deg` — Latitude of the second point in degrees. **Returned value** @@ -120,9 +120,9 @@ pointInEllipses(x, y, x₀, y₀, a₀, b₀,...,xₙ, yₙ, aₙ, bₙ) **Input parameters** -- `x, y` — Coordinates of a point on the plane. -- `xᵢ, yᵢ` — Coordinates of the center of the `i`-th ellipsis. -- `aᵢ, bᵢ` — Axes of the `i`-th ellipsis in units of x, y coordinates. +- `x, y` — Coordinates of a point on the plane. +- `xᵢ, yᵢ` — Coordinates of the center of the `i`-th ellipsis. +- `aᵢ, bᵢ` — Axes of the `i`-th ellipsis in units of x, y coordinates. The input parameters must be `2+4⋅n`, where `n` is the number of ellipses. @@ -152,9 +152,9 @@ pointInPolygon((x, y), [(a, b), (c, d) ...], ...) **Input values** -- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers. -- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. -- The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons. +- `(x, y)` — Coordinates of a point on the plane. Data type — [Tuple](../../../sql-reference/data-types/tuple.md) — A tuple of two numbers. +- `[(a, b), (c, d) ...]` — Polygon vertices. Data type — [Array](../../../sql-reference/data-types/array.md). Each vertex is represented by a pair of coordinates `(a, b)`. Vertices should be specified in a clockwise or counterclockwise order. The minimum number of vertices is 3. The polygon must be constant. +- The function also supports polygons with holes (cut out sections). In this case, add polygons that define the cut out sections using additional arguments of the function. The function does not support non-simply-connected polygons. **Returned values** diff --git a/docs/en/sql-reference/functions/geo/geohash.md b/docs/en/sql-reference/functions/geo/geohash.md index baed8243935..ce16af44e90 100644 --- a/docs/en/sql-reference/functions/geo/geohash.md +++ b/docs/en/sql-reference/functions/geo/geohash.md @@ -18,13 +18,13 @@ geohashEncode(longitude, latitude, [precision]) **Input values** -- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]` -- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]` -- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`. +- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]` +- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]` +- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`. **Returned values** -- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used). +- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used). **Example** @@ -44,11 +44,11 @@ Decodes any [geohash](#geohash)-encoded string into longitude and latitude. **Input values** -- encoded string - geohash-encoded string. +- encoded string - geohash-encoded string. **Returned values** -- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude. +- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude. **Example** @@ -74,11 +74,11 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi **Arguments** -- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). -- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `longitude_min` — Minimum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). +- `latitude_min` — Minimum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). +- `longitude_max` — Maximum longitude. Range: `[-180°, 180°]`. Type: [Float](../../../sql-reference/data-types/float.md). +- `latitude_max` — Maximum latitude. Range: `[-90°, 90°]`. Type: [Float](../../../sql-reference/data-types/float.md). +- `precision` — Geohash precision. Range: `[1, 12]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). :::note All coordinate parameters must be of the same type: either `Float32` or `Float64`. @@ -86,8 +86,8 @@ All coordinate parameters must be of the same type: either `Float32` or `Float64 **Returned values** -- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. -- `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values. +- Array of precision-long strings of geohash-boxes covering provided area, you should not rely on order of items. +- `[]` - Empty array if minimum latitude and longitude values aren’t less than corresponding maximum values. Type: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). diff --git a/docs/en/sql-reference/functions/geo/h3.md b/docs/en/sql-reference/functions/geo/h3.md index ce15fed0f21..1f695a13598 100644 --- a/docs/en/sql-reference/functions/geo/h3.md +++ b/docs/en/sql-reference/functions/geo/h3.md @@ -26,12 +26,12 @@ h3IsValid(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- 1 — The number is a valid H3 index. -- 0 — The number is not a valid H3 index. +- 1 — The number is a valid H3 index. +- 0 — The number is not a valid H3 index. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -63,12 +63,12 @@ h3GetResolution(h3index) **Parameter** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Index resolution. Range: `[0, 15]`. -- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. +- Index resolution. Range: `[0, 15]`. +- If the index is not valid, the function returns a random value. Use [h3IsValid](#h3isvalid) to verify the index. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -100,11 +100,11 @@ h3EdgeAngle(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in grades. Type: [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -134,11 +134,11 @@ h3EdgeLengthM(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in meters. Type: [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -168,11 +168,11 @@ h3EdgeLengthKm(resolution) **Parameter** -- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. +- `resolution` — Index resolution. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). Range: `[0, 15]`. **Returned values** -- The average length of the [H3](#h3index) hexagon edge in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). +- The average length of the [H3](#h3index) hexagon edge in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -202,14 +202,14 @@ geoToH3(lon, lat, resolution) **Arguments** -- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Hexagon index number. -- 0 in case of error. +- Hexagon index number. +- 0 in case of error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -241,11 +241,11 @@ h3ToGeo(h3Index) **Arguments** -- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -275,11 +275,11 @@ h3ToGeoBoundary(h3Index) **Arguments** -- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array of pairs '(lon, lat)'. +- Array of pairs '(lon, lat)'. Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). @@ -311,12 +311,12 @@ h3kRing(h3index, k) **Arguments** -- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Radius. Type: [integer](../../../sql-reference/data-types/int-uint.md) +- `h3index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `k` — Radius. Type: [integer](../../../sql-reference/data-types/int-uint.md) **Returned values** -- Array of H3 indexes. +- Array of H3 indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -354,11 +354,11 @@ h3GetBaseCell(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Hexagon base cell number. +- Hexagon base cell number. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -390,11 +390,11 @@ h3HexAreaM2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Area in square meters. +- Area in square meters. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -426,11 +426,11 @@ h3HexAreaKm2(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Area in square kilometers. +- Area in square kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -462,13 +462,13 @@ h3IndexesAreNeighbors(index1, index2) **Arguments** -- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index1` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index2` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- `1` — Indexes are neighbours. -- `0` — Indexes are not neighbours. +- `1` — Indexes are neighbours. +- `0` — Indexes are not neighbours. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -500,12 +500,12 @@ h3ToChildren(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array of the child H3-indexes. +- Array of the child H3-indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -537,12 +537,12 @@ h3ToParent(index, resolution) **Arguments** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Parent H3 index. +- Parent H3 index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -572,11 +572,11 @@ h3ToString(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- String representation of the H3 index. +- String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md). @@ -608,11 +608,11 @@ stringToH3(index_str) **Parameter** -- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md). +- `index_str` — String representation of the H3 index. Type: [String](../../../sql-reference/data-types/string.md). **Returned value** -- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- Hexagon index number. Returns 0 on error. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -642,11 +642,11 @@ h3GetResolution(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Example** @@ -676,12 +676,12 @@ h3IsResClassIII(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- `1` — Index has a resolution with Class III orientation. -- `0` — Index doesn't have a resolution with Class III orientation. +- `1` — Index has a resolution with Class III orientation. +- `0` — Index doesn't have a resolution with Class III orientation. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -713,12 +713,12 @@ h3IsPentagon(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- `1` — Index represents a pentagonal cell. -- `0` — Index doesn't represent a pentagonal cell. +- `1` — Index represents a pentagonal cell. +- `0` — Index doesn't represent a pentagonal cell. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -750,11 +750,11 @@ h3GetFaces(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array containing icosahedron faces intersected by a given H3 index. +- Array containing icosahedron faces intersected by a given H3 index. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -786,11 +786,11 @@ h3CellAreaM2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Cell area in square meters. +- Cell area in square meters. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -822,11 +822,11 @@ h3CellAreaRads2(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Cell area in square radians. +- Cell area in square radians. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -858,8 +858,8 @@ h3ToCenterChild(index, resolution) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned values** @@ -895,11 +895,11 @@ h3ExactEdgeLengthM(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Exact edge length in meters. +- Exact edge length in meters. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -931,11 +931,11 @@ h3ExactEdgeLengthKm(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Exact edge length in kilometers. +- Exact edge length in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -967,11 +967,11 @@ h3ExactEdgeLengthRads(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Exact edge length in radians. +- Exact edge length in radians. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -1003,11 +1003,11 @@ h3NumHexagons(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Number of H3 indices. +- Number of H3 indices. Type: [Int64](../../../sql-reference/data-types/int-uint.md). @@ -1039,12 +1039,12 @@ h3PointDistM(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- Haversine or great circle distance in meters. +- Haversine or great circle distance in meters. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -1076,12 +1076,12 @@ h3PointDistKm(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- Haversine or great circle distance in kilometers. +- Haversine or great circle distance in kilometers. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -1113,12 +1113,12 @@ h3PointDistRads(lat1, lon1, lat2, lon2) **Arguments** -- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). -- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat1`, `lon1` — Latitude and Longitude of point1 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). +- `lat2`, `lon2` — Latitude and Longitude of point2 in degrees. Type: [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- Haversine or great circle distance in radians. +- Haversine or great circle distance in radians. Type: [Float64](../../../sql-reference/data-types/float.md). @@ -1150,7 +1150,7 @@ h3GetRes0Indexes() **Returned values** -- Array of all the resolution 0 H3 indexes. +- Array of all the resolution 0 H3 indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -1183,11 +1183,11 @@ h3GetPentagonIndexes(resolution) **Parameter** -- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). +- `resolution` — Index resolution. Range: `[0, 15]`. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Array of all pentagon H3 indexes. +- Array of all pentagon H3 indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -1219,8 +1219,8 @@ h3Line(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -1256,12 +1256,12 @@ h3Distance(start,end) **Parameter** -- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `start` — Hexagon index number that represents a starting point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `end` — Hexagon index number that represents an ending point. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Number of grid cells. +- Number of grid cells. Type: [Int64](../../../sql-reference/data-types/int-uint.md). @@ -1297,12 +1297,12 @@ h3HexRing(index, k) **Parameter** -- `index` — Hexagon index number that represents the origin. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `k` — Distance. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents the origin. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `k` — Distance. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- Array of H3 indexes. +- Array of H3 indexes. Type: [Array](../../../sql-reference/data-types/array.md)([UInt64](../../../sql-reference/data-types/int-uint.md)). @@ -1334,12 +1334,12 @@ h3GetUnidirectionalEdge(originIndex, destinationIndex) **Parameter** -- `originIndex` — Origin Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `destinationIndex` — Destination Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `originIndex` — Origin Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `destinationIndex` — Destination Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Unidirectional Edge Hexagon Index number. +- Unidirectional Edge Hexagon Index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -1371,12 +1371,12 @@ h3UnidirectionalEdgeisValid(index) **Parameter** -- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- 1 — The H3 index is a valid unidirectional edge. -- 0 — The H3 index is not a valid unidirectional edge. +- 1 — The H3 index is a valid unidirectional edge. +- 0 — The H3 index is not a valid unidirectional edge. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -1408,11 +1408,11 @@ h3GetOriginIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Origin Hexagon Index number. +- Origin Hexagon Index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -1444,11 +1444,11 @@ h3GetDestinationIndexFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Destination Hexagon Index number. +- Destination Hexagon Index number. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -1480,7 +1480,7 @@ h3GetIndexesFromUnidirectionalEdge(edge) **Parameter** -- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `edge` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -1519,7 +1519,7 @@ h3GetUnidirectionalEdgesFromHexagon(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** @@ -1555,11 +1555,11 @@ h3GetUnidirectionalEdgeBoundary(index) **Parameter** -- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `index` — Hexagon index number that represents a unidirectional edge. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned value** -- Array of pairs '(lon, lat)'. +- Array of pairs '(lon, lat)'. Type: [Array](../../../sql-reference/data-types/array.md)([Float64](../../../sql-reference/data-types/float.md), [Float64](../../../sql-reference/data-types/float.md)). diff --git a/docs/en/sql-reference/functions/geo/index.md b/docs/en/sql-reference/functions/geo/index.md index 3d5ca806c7e..d46e60281e2 100644 --- a/docs/en/sql-reference/functions/geo/index.md +++ b/docs/en/sql-reference/functions/geo/index.md @@ -8,67 +8,67 @@ title: "Geo Functions" ## Geographical Coordinates Functions -- [greatCircleDistance](./coordinates.md#greatcircledistance) -- [geoDistance](./coordinates.md#geodistance) -- [greatCircleAngle](./coordinates.md#greatcircleangle) -- [pointInEllipses](./coordinates.md#pointinellipses) -- [pointInPolygon](./coordinates.md#pointinpolygon) +- [greatCircleDistance](./coordinates.md#greatcircledistance) +- [geoDistance](./coordinates.md#geodistance) +- [greatCircleAngle](./coordinates.md#greatcircleangle) +- [pointInEllipses](./coordinates.md#pointinellipses) +- [pointInPolygon](./coordinates.md#pointinpolygon) ## Geohash Functions -- [geohashEncode](./geohash.md#geohashencode) -- [geohashDecode](./geohash.md#geohashdecode) -- [geohashesInBox](./geohash.md#geohashesinbox) +- [geohashEncode](./geohash.md#geohashencode) +- [geohashDecode](./geohash.md#geohashdecode) +- [geohashesInBox](./geohash.md#geohashesinbox) ## H3 Indexes Functions -- [h3IsValid](./h3.md#h3isvalid) -- [h3GetResolution](./h3.md#h3getresolution) -- [h3EdgeAngle](./h3.md#h3edgeangle) -- [h3EdgeLengthM](./h3.md#h3edgelengthm) -- [h3EdgeLengthKm](./h3.md#h3edgelengthkm) -- [geoToH3](./h3.md#geotoh3) -- [h3ToGeo](./h3.md#h3togeo) -- [h3ToGeoBoundary](./h3.md#h3togeoboundary) -- [h3kRing](./h3.md#h3kring) -- [h3GetBaseCell](./h3.md#h3getbasecell) -- [h3HexAreaM2](./h3.md#h3hexaream2) -- [h3HexAreaKm2](./h3.md#h3hexareakm2) -- [h3IndexesAreNeighbors](./h3.md#h3indexesareneighbors) -- [h3ToChildren](./h3.md#h3tochildren) -- [h3ToParent](./h3.md#h3toparent) -- [h3ToString](./h3.md#h3tostring) -- [stringToH3](./h3.md#stringtoh3) -- [h3GetResolution](./h3.md#h3getresolution) -- [h3IsResClassIII](./h3.md#h3isresclassiii) -- [h3IsPentagon](./h3.md#h3ispentagon) -- [h3GetFaces](./h3.md#h3getfaces) -- [h3CellAreaM2](./h3.md#h3cellaream2) -- [h3CellAreaRads2](./h3.md#h3cellarearads2) -- [h3ToCenterChild](./h3.md#h3tocenterchild) -- [h3ExactEdgeLengthM](./h3.md#h3exactedgelengthm) -- [h3ExactEdgeLengthKm](./h3.md#h3exactedgelengthkm) -- [h3ExactEdgeLengthRads](./h3.md#h3exactedgelengthrads) -- [h3NumHexagons](./h3.md#h3numhexagons) -- [h3Line](./h3.md#h3line) -- [h3Distance](./h3.md#h3distance) -- [h3HexRing](./h3.md#h3hexring) -- [h3GetUnidirectionalEdge](./h3.md#h3getunidirectionaledge) -- [h3UnidirectionalEdgeIsValid](./h3.md#h3unidirectionaledgeisvalid) -- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3getoriginindexfromunidirectionaledge) -- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3getdestinationindexfromunidirectionaledge) -- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3getindexesfromunidirectionaledge) -- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3getunidirectionaledgesfromhexagon) -- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3getunidirectionaledgeboundary) +- [h3IsValid](./h3.md#h3isvalid) +- [h3GetResolution](./h3.md#h3getresolution) +- [h3EdgeAngle](./h3.md#h3edgeangle) +- [h3EdgeLengthM](./h3.md#h3edgelengthm) +- [h3EdgeLengthKm](./h3.md#h3edgelengthkm) +- [geoToH3](./h3.md#geotoh3) +- [h3ToGeo](./h3.md#h3togeo) +- [h3ToGeoBoundary](./h3.md#h3togeoboundary) +- [h3kRing](./h3.md#h3kring) +- [h3GetBaseCell](./h3.md#h3getbasecell) +- [h3HexAreaM2](./h3.md#h3hexaream2) +- [h3HexAreaKm2](./h3.md#h3hexareakm2) +- [h3IndexesAreNeighbors](./h3.md#h3indexesareneighbors) +- [h3ToChildren](./h3.md#h3tochildren) +- [h3ToParent](./h3.md#h3toparent) +- [h3ToString](./h3.md#h3tostring) +- [stringToH3](./h3.md#stringtoh3) +- [h3GetResolution](./h3.md#h3getresolution) +- [h3IsResClassIII](./h3.md#h3isresclassiii) +- [h3IsPentagon](./h3.md#h3ispentagon) +- [h3GetFaces](./h3.md#h3getfaces) +- [h3CellAreaM2](./h3.md#h3cellaream2) +- [h3CellAreaRads2](./h3.md#h3cellarearads2) +- [h3ToCenterChild](./h3.md#h3tocenterchild) +- [h3ExactEdgeLengthM](./h3.md#h3exactedgelengthm) +- [h3ExactEdgeLengthKm](./h3.md#h3exactedgelengthkm) +- [h3ExactEdgeLengthRads](./h3.md#h3exactedgelengthrads) +- [h3NumHexagons](./h3.md#h3numhexagons) +- [h3Line](./h3.md#h3line) +- [h3Distance](./h3.md#h3distance) +- [h3HexRing](./h3.md#h3hexring) +- [h3GetUnidirectionalEdge](./h3.md#h3getunidirectionaledge) +- [h3UnidirectionalEdgeIsValid](./h3.md#h3unidirectionaledgeisvalid) +- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3getoriginindexfromunidirectionaledge) +- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3getdestinationindexfromunidirectionaledge) +- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3getindexesfromunidirectionaledge) +- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3getunidirectionaledgesfromhexagon) +- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3getunidirectionaledgeboundary) ## S2 Index Functions -- [geoToS2](./s2.md#geotos2) -- [s2ToGeo](./s2.md#s2togeo) -- [s2GetNeighbors](./s2.md#s2getneighbors) -- [s2CellsIntersect](./s2.md#s2cellsintersect) -- [s2CapContains](./s2.md#s2capcontains) -- [s2CapUnion](./s2.md#s2capunion) -- [s2RectAdd](./s2.md#s2rectadd) -- [s2RectContains](./s2.md#s2rectcontains) -- [s2RectUnion](./s2.md#s2rectunion) -- [s2RectIntersection](./s2.md#s2rectintersection) +- [geoToS2](./s2.md#geotos2) +- [s2ToGeo](./s2.md#s2togeo) +- [s2GetNeighbors](./s2.md#s2getneighbors) +- [s2CellsIntersect](./s2.md#s2cellsintersect) +- [s2CapContains](./s2.md#s2capcontains) +- [s2CapUnion](./s2.md#s2capunion) +- [s2RectAdd](./s2.md#s2rectadd) +- [s2RectContains](./s2.md#s2rectcontains) +- [s2RectUnion](./s2.md#s2rectunion) +- [s2RectIntersection](./s2.md#s2rectintersection) diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index 3cd66cfaaeb..24a9ac53f4d 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -21,12 +21,12 @@ geoToS2(lon, lat) **Arguments** -- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). -- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). +- `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). +- `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- S2 point index. +- S2 point index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -58,11 +58,11 @@ s2ToGeo(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- A tuple consisting of two values: `tuple(lon,lat)`. +- A tuple consisting of two values: `tuple(lon,lat)`. Type: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md). @@ -94,11 +94,11 @@ s2GetNeighbors(s2index) **Arguments** -- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. +- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). @@ -130,12 +130,12 @@ s2CellsIntersect(s2index1, s2index2) **Arguments** -- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `siIndex1`, `s2index2` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- 1 — If the cells intersect. -- 0 — If the cells don't intersect. +- 1 — If the cells intersect. +- 0 — If the cells don't intersect. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -167,14 +167,14 @@ s2CapContains(center, degrees, point) **Arguments** -- `center` — S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md). -- `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `center` — S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `degrees` — Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `point` — S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- 1 — If the cap contains the S2 point index. -- 0 — If the cap doesn't contain the S2 point index. +- 1 — If the cap contains the S2 point index. +- 0 — If the cap doesn't contain the S2 point index. Type: [UInt8](../../../sql-reference/data-types/int-uint.md). @@ -206,13 +206,13 @@ s2CapUnion(center1, radius1, center2, radius2) **Arguments** -- `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md). +- `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md). **Returned values** -- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `radius` — Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md). +- `center` — S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `radius` — Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md). **Example** @@ -242,14 +242,14 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Point` — Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md). +- `s2PointLow` — Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointHigh` — Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md). **Example** @@ -279,14 +279,14 @@ s2RectContains(s2PointLow, s2PointHi, s2Point) **Arguments** -- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointLow` — Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2PointHigh` — High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Point` — Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- 1 — If the rectangle contains the given S2 point. -- 0 — If the rectangle doesn't contain the given S2 point. +- 1 — If the rectangle contains the given S2 point. +- 0 — If the rectangle doesn't contain the given S2 point. **Example** @@ -316,13 +316,13 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi) **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** @@ -352,13 +352,13 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin **Arguments** -- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md). **Returned values** -- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). -- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointLow` — Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). +- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). **Example** diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 2943ba13861..1f471e99255 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/hash-functions -sidebar_position: 50 +sidebar_position: 85 sidebar_label: Hash --- @@ -335,11 +335,11 @@ Even in these cases, we recommend applying the function offline and pre-calculat **Arguments** -- `s` — Input string for SHA hash calculation. [String](/docs/en/sql-reference/data-types/string.md). +- `s` — Input string for SHA hash calculation. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). +- SHA hash as a hex-unencoded FixedString. SHA-1 returns as FixedString(20), SHA-224 as FixedString(28), SHA-256 — FixedString(32), SHA-512 — FixedString(64). Type: [FixedString](/docs/en/sql-reference/data-types/fixedstring.md). @@ -441,11 +441,11 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0 ## javaHash -Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452), -[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405), -[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410), -[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959), -[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060). +Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452), +[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405), +[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410), +[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959), +[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060). This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types. @@ -502,7 +502,7 @@ javaHashUTF16LE(stringUtf16le) **Arguments** -- `stringUtf16le` — a string in UTF-16LE encoding. +- `stringUtf16le` — a string in UTF-16LE encoding. **Returned value** @@ -607,8 +607,8 @@ Both functions take a variable number of input parameters. Arguments can be any **Returned Value** -- The `murmurHash2_32` function returns hash value having the [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type. -- The `murmurHash2_64` function returns hash value having the [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type. +- The `murmurHash2_32` function returns hash value having the [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type. +- The `murmurHash2_64` function returns hash value having the [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type. **Example** @@ -634,11 +634,11 @@ gccMurmurHash(par1, ...) **Arguments** -- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). **Returned value** -- Calculated hash value. +- Calculated hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -660,6 +660,45 @@ Result: └──────────────────────┴─────────────────────┘ ``` + +## kafkaMurmurHash + +Calculates a 32-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [Kafka](https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L482) and without the highest bit to be compatible with [Default Partitioner](https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java#L328). + +**Syntax** + +```sql +MurmurHash(par1, ...) +``` + +**Arguments** + +- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). + +**Returned value** + +- Calculated hash value. + +Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT + kafkaMurmurHash('foobar') AS res1, + kafkaMurmurHash(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS res2 +``` + +Result: + +```response +┌───────res1─┬─────res2─┐ +│ 1357151166 │ 85479775 │ +└────────────┴──────────┘ +``` + ## murmurHash3_32, murmurHash3_64 Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value. @@ -675,8 +714,8 @@ Both functions take a variable number of input parameters. Arguments can be any **Returned Value** -- The `murmurHash3_32` function returns a [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. -- The `murmurHash3_64` function returns a [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- The `murmurHash3_32` function returns a [UInt32](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +- The `murmurHash3_64` function returns a [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. **Example** @@ -702,7 +741,7 @@ murmurHash3_128(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions). [String](/docs/en/sql-reference/data-types/string.md). +- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions). [String](/docs/en/sql-reference/data-types/string.md). **Returned value** @@ -738,7 +777,7 @@ xxh3(expr) **Arguments** -- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions) of any data type. +- `expr` — A list of [expressions](/docs/en/sql-reference/syntax.md/#syntax-expressions) of any data type. **Returned value** @@ -798,7 +837,7 @@ Result: **See Also** -- [xxHash](http://cyan4973.github.io/xxHash/). +- [xxHash](http://cyan4973.github.io/xxHash/). ## ngramSimHash @@ -814,12 +853,12 @@ ngramSimHash(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -853,12 +892,12 @@ ngramSimHashCaseInsensitive(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -892,12 +931,12 @@ ngramSimHashUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -931,12 +970,12 @@ ngramSimHashCaseInsensitiveUTF8(string[, ngramsize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -970,12 +1009,12 @@ wordShingleSimHash(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -1009,12 +1048,12 @@ wordShingleSimHashCaseInsensitive(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -1048,12 +1087,12 @@ wordShingleSimHashUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optinal. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -1087,12 +1126,12 @@ wordShingleSimHashCaseInsensitiveUTF8(string[, shinglesize]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). @@ -1126,13 +1165,13 @@ ngramMinHash(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1166,13 +1205,13 @@ ngramMinHashCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1206,13 +1245,13 @@ ngramMinHashUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1246,13 +1285,13 @@ ngramMinHashCaseInsensitiveUTF8(string [, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1284,13 +1323,13 @@ ngramMinHashArg(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. +- Tuple with two tuples with `hashnum` n-grams each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1322,13 +1361,13 @@ ngramMinHashArgCaseInsensitive(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. +- Tuple with two tuples with `hashnum` n-grams each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1360,13 +1399,13 @@ ngramMinHashArgUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. +- Tuple with two tuples with `hashnum` n-grams each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1398,13 +1437,13 @@ ngramMinHashArgCaseInsensitiveUTF8(string[, ngramsize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `ngramsize` — The size of an n-gram. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` n-grams each. +- Tuple with two tuples with `hashnum` n-grams each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1438,13 +1477,13 @@ wordShingleMinHash(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1478,13 +1517,13 @@ wordShingleMinHashCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1518,13 +1557,13 @@ wordShingleMinHashUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1558,13 +1597,13 @@ wordShingleMinHashCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two hashes — the minimum and the maximum. +- Tuple with two hashes — the minimum and the maximum. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([UInt64](/docs/en/sql-reference/data-types/int-uint.md), [UInt64](/docs/en/sql-reference/data-types/int-uint.md)). @@ -1596,13 +1635,13 @@ wordShingleMinHashArg(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. +- Tuple with two tuples with `hashnum` word shingles each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1634,13 +1673,13 @@ wordShingleMinHashArgCaseInsensitive(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. +- Tuple with two tuples with `hashnum` word shingles each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1672,13 +1711,13 @@ wordShingleMinHashArgUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. +- Tuple with two tuples with `hashnum` word shingles each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). @@ -1710,13 +1749,13 @@ wordShingleMinHashArgCaseInsensitiveUTF8(string[, shinglesize, hashnum]) **Arguments** -- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). -- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). +- `shinglesize` — The size of a word shingle. Optional. Possible values: any number from `1` to `25`. Default value: `3`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `hashnum` — The number of minimum and maximum hashes used to calculate the result. Optional. Possible values: any number from `1` to `25`. Default value: `6`. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). **Returned value** -- Tuple with two tuples with `hashnum` word shingles each. +- Tuple with two tuples with `hashnum` word shingles each. Type: [Tuple](/docs/en/sql-reference/data-types/tuple.md)([Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md)), [Tuple](/docs/en/sql-reference/data-types/tuple.md)([String](/docs/en/sql-reference/data-types/string.md))). diff --git a/docs/en/sql-reference/functions/in-functions.md b/docs/en/sql-reference/functions/in-functions.md index 185672227da..193c54cea44 100644 --- a/docs/en/sql-reference/functions/in-functions.md +++ b/docs/en/sql-reference/functions/in-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/in-functions -sidebar_position: 60 +sidebar_position: 90 sidebar_label: IN Operator --- diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index 01da5f01679..1577c01eec9 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/functions/ -sidebar_position: 32 -sidebar_label: Functions +sidebar_position: 1 +sidebar_label: Overview --- -# Functions +# Regular Functions There are at least\* two types of functions - regular functions (they are just called “functions”) and aggregate functions. These are completely different concepts. Regular functions work as if they are applied to each row separately (for each row, the result of the function does not depend on the other rows). Aggregate functions accumulate a set of values from various rows (i.e. they depend on the entire set of rows). @@ -37,8 +37,8 @@ Functions can be implemented in different ways for constant and non-constant arg Functions have the following behaviors: -- If at least one of the arguments of the function is `NULL`, the function result is also `NULL`. -- Special behavior that is specified individually in the description of each function. In the ClickHouse source code, these functions have `UseDefaultImplementationForNulls=false`. +- If at least one of the arguments of the function is `NULL`, the function result is also `NULL`. +- Special behavior that is specified individually in the description of each function. In the ClickHouse source code, these functions have `UseDefaultImplementationForNulls=false`. ## Constancy @@ -70,21 +70,21 @@ The configuration of executable user defined functions can be located in one or A function configuration contains the following settings: -- `name` - a function name. -- `command` - script name to execute or command if `execute_direct` is false. -- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number. -- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command. -- `return_type` - the type of a returned value. -- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. -- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created. -- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. -- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`. -- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. -- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. -- `pool_size` - the size of a command pool. Optional. Default value is `16`. -- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. -- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. -- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter. +- `name` - a function name. +- `command` - script name to execute or command if `execute_direct` is false. +- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number. +- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command. +- `return_type` - the type of a returned value. +- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`. +- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created. +- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`. +- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. Optional parameter. +- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. Optional parameter. +- `pool_size` - the size of a command pool. Optional. Default value is `16`. +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Optional. Default value is `false`. +- `execute_direct` - If `execute_direct` = `1`, then `command` will be searched inside user_scripts folder specified by [user_scripts_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_scripts_path). Additional script arguments can be specified using whitespace separator. Example: `script_name arg1 arg2`. If `execute_direct` = `0`, `command` is passed as argument for `bin/sh -c`. Default value is `1`. Optional parameter. +- `lifetime` - the reload interval of a function in seconds. If it is set to `0` then the function is not reloaded. Default value is `0`. Optional parameter. The command must read arguments from `STDIN` and must output the result to `STDOUT`. The command must process arguments iteratively. That is after processing a chunk of arguments it must wait for the next chunk. @@ -287,8 +287,8 @@ For distributed query processing, as many stages of query processing as possible This means that functions can be performed on different servers. For example, in the query `SELECT f(sum(g(x))) FROM distributed_table GROUP BY h(y),` -- if a `distributed_table` has at least two shards, the functions ‘g’ and ‘h’ are performed on remote servers, and the function ‘f’ is performed on the requestor server. -- if a `distributed_table` has only one shard, all the ‘f’, ‘g’, and ‘h’ functions are performed on this shard’s server. +- if a `distributed_table` has at least two shards, the functions ‘g’ and ‘h’ are performed on remote servers, and the function ‘f’ is performed on the requestor server. +- if a `distributed_table` has only one shard, all the ‘f’, ‘g’, and ‘h’ functions are performed on this shard’s server. The result of a function usually does not depend on which server it is performed on. However, sometimes this is important. For example, functions that work with dictionaries use the dictionary that exists on the server they are running on. diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index c01abd6ed89..8cb35483555 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/introspection -sidebar_position: 65 +sidebar_position: 100 sidebar_label: Introspection --- @@ -14,9 +14,9 @@ These functions are slow and may impose security considerations. For proper operation of introspection functions: -- Install the `clickhouse-common-static-dbg` package. +- Install the `clickhouse-common-static-dbg` package. -- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1. +- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1. For security reasons introspection functions are disabled by default. @@ -36,17 +36,17 @@ addressToLine(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Source code filename and the line number in this file delimited by colon. +- Source code filename and the line number in this file delimited by colon. For example, `/build/obj-x86_64-linux-gnu/../src/Common/ThreadPool.cpp:199`, where `199` is a line number. -- Name of a binary, if the function couldn’t find the debug information. +- Name of a binary, if the function couldn’t find the debug information. -- Empty string, if the address is not valid. +- Empty string, if the address is not valid. Type: [String](../../sql-reference/data-types/string.md). @@ -129,15 +129,15 @@ addressToLineWithInlines(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. +- Array which first element is source code filename and the line number in this file delimited by colon. And from second element, inline functions' source code filename and line number and function name are listed. -- Array with single element which is name of a binary, if the function couldn’t find the debug information. +- Array with single element which is name of a binary, if the function couldn’t find the debug information. -- Empty array, if the address is not valid. +- Empty array, if the address is not valid. Type: [Array(String)](../../sql-reference/data-types/array.md). @@ -232,12 +232,12 @@ addressToSymbol(address_of_binary_instruction) **Arguments** -- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. +- `address_of_binary_instruction` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Address of instruction in a running process. **Returned value** -- Symbol from ClickHouse object files. -- Empty string, if the address is not valid. +- Symbol from ClickHouse object files. +- Empty string, if the address is not valid. Type: [String](../../sql-reference/data-types/string.md). @@ -329,12 +329,12 @@ demangle(symbol) **Arguments** -- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. +- `symbol` ([String](../../sql-reference/data-types/string.md)) — Symbol from an object file. **Returned value** -- Name of the C++ function. -- Empty string if a symbol is not valid. +- Name of the C++ function. +- Empty string if a symbol is not valid. Type: [String](../../sql-reference/data-types/string.md). @@ -425,7 +425,7 @@ tid() **Returned value** -- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). +- Current thread id. [Uint64](../../sql-reference/data-types/int-uint.md#uint-ranges). **Example** @@ -455,11 +455,11 @@ logTrace('message') **Arguments** -- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). +- `message` — Message that is emitted to server log. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Always returns 0. +- Always returns 0. **Example** diff --git a/docs/en/sql-reference/functions/ip-address-functions.md b/docs/en/sql-reference/functions/ip-address-functions.md index 47058a28d12..0dc1db1161b 100644 --- a/docs/en/sql-reference/functions/ip-address-functions.md +++ b/docs/en/sql-reference/functions/ip-address-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/ip-address-functions -sidebar_position: 55 +sidebar_position: 95 sidebar_label: IP Addresses --- @@ -147,11 +147,11 @@ IPv6StringToNum(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../../sql-reference/data-types/string.md). **Returned value** -- IPv6 address in binary format. +- IPv6 address in binary format. Type: [FixedString(16)](../../sql-reference/data-types/fixedstring.md). @@ -175,7 +175,7 @@ Result: **See Also** -- [cutIPv6](#cutipv6x-bytestocutforipv6-bytestocutforipv4). +- [cutIPv6](#cutipv6x-bytestocutforipv6-bytestocutforipv4). ## IPv6StringToNumOrDefault(s) @@ -309,11 +309,11 @@ toIPv6(string) **Argument** -- `string` — IP address. [String](../../sql-reference/data-types/string.md) +- `string` — IP address. [String](../../sql-reference/data-types/string.md) **Returned value** -- IP address. +- IP address. Type: [IPv6](../../sql-reference/data-types/domains/ipv6.md). @@ -370,11 +370,11 @@ isIPv4String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../../sql-reference/data-types/string.md). **Returned value** -- `1` if `string` is IPv4 address, `0` otherwise. +- `1` if `string` is IPv4 address, `0` otherwise. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -408,11 +408,11 @@ isIPv6String(string) **Arguments** -- `string` — IP address. [String](../../sql-reference/data-types/string.md). +- `string` — IP address. [String](../../sql-reference/data-types/string.md). **Returned value** -- `1` if `string` is IPv6 address, `0` otherwise. +- `1` if `string` is IPv6 address, `0` otherwise. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -449,12 +449,12 @@ This function accepts both IPv4 and IPv6 addresses (and networks) represented as **Arguments** -- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). -- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../../sql-reference/data-types/string.md). +- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). +- `prefix` — An IPv4 or IPv6 network prefix in CIDR. [String](../../sql-reference/data-types/string.md). **Returned value** -- `1` or `0`. +- `1` or `0`. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -516,11 +516,11 @@ This function performs reverse DNS resolutions on both IPv4 and IPv6. **Arguments** -- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). +- `address` — An IPv4 or IPv6 address. [String](../../sql-reference/data-types/string.md). **Returned value** -- Associated domains (PTR records). +- Associated domains (PTR records). Type: Type: [Array(String)](../../sql-reference/data-types/array.md). diff --git a/docs/en/sql-reference/functions/json-functions.md b/docs/en/sql-reference/functions/json-functions.md index 81697f901c1..31d53ba0359 100644 --- a/docs/en/sql-reference/functions/json-functions.md +++ b/docs/en/sql-reference/functions/json-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/json-functions -sidebar_position: 56 +sidebar_position: 105 sidebar_label: JSON --- @@ -109,9 +109,9 @@ SELECT JSONHas('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4) = 0 `indices_or_keys` is a list of zero or more arguments each of them can be either string or integer. -- String = access object member by key. -- Positive integer = access the n-th member/key from the beginning. -- Negative integer = access the n-th member/key from the end. +- String = access object member by key. +- Positive integer = access the n-th member/key from the beginning. +- Negative integer = access the n-th member/key from the end. Minimum index of the element is 1. Thus the element 0 does not exist. @@ -206,6 +206,7 @@ Examples: ``` sql SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(String, Array(Float64))') = ('hello',[-100,200,300]) SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'Tuple(b Array(Float64), a String)') = ([-100,200,300],'hello') +SELECT JSONExtract('{"a": "hello", "b": "world"}', 'Map(String, String)') = map('a', 'hello', 'b', 'world'); SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 'Array(Nullable(Int8))') = [-100, NULL, NULL] SELECT JSONExtract('{"a": "hello", "b": [-100, 200.0, 300]}', 'b', 4, 'Nullable(Int64)') = NULL SELECT JSONExtract('{"passed": true}', 'passed', 'UInt8') = 1 @@ -235,8 +236,8 @@ JSONExtractKeys(json[, a, b, c...]) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../../sql-reference/data-types/string.md) to get the field by the key or an [Integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `a, b, c...` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [String](../../sql-reference/data-types/string.md) to get the field by the key or an [Integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned value** @@ -297,13 +298,13 @@ JSONExtractKeysAndValuesRaw(json[, p, a, t, h]) **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. -- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. +- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `p, a, t, h` — Comma-separated indices or keys that specify the path to the inner field in a nested JSON object. Each argument can be either a [string](../../sql-reference/data-types/string.md) to get the field by the key or an [integer](../../sql-reference/data-types/int-uint.md) to get the N-th field (indexed from 1, negative integers count from the end). If not set, the whole JSON is parsed as the top-level object. Optional parameter. **Returned values** -- Array with `('key', 'value')` tuples. Both tuple members are strings. -- Empty array if the requested object does not exist, or input JSON is invalid. +- Array with `('key', 'value')` tuples. Both tuple members are strings. +- Empty array if the requested object does not exist, or input JSON is invalid. Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), [String](../../sql-reference/data-types/string.md)). @@ -442,11 +443,11 @@ toJSONString(value) **Arguments** -- `value` — Value to serialize. Value may be of any data type. +- `value` — Value to serialize. Value may be of any data type. **Returned value** -- JSON representation of the value. +- JSON representation of the value. Type: [String](../../sql-reference/data-types/string.md). @@ -471,8 +472,8 @@ Result: **See Also** -- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) -- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) +- [output_format_json_quote_64bit_integers](../../operations/settings/settings.md#session_settings-output_format_json_quote_64bit_integers) +- [output_format_json_quote_denormals](../../operations/settings/settings.md#settings-output_format_json_quote_denormals) ## JSONArrayLength @@ -489,11 +490,11 @@ Alias: `JSON_ARRAY_LENGTH(json)`. **Arguments** -- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. +- `json` — [String](../../sql-reference/data-types/string.md) with valid JSON. **Returned value** -- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. +- If `json` is a valid JSON array string, returns the number of array elements, otherwise returns NULL. Type: [Nullable(UInt64)](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/logical-functions.md b/docs/en/sql-reference/functions/logical-functions.md index 137753d12c9..1c45994605a 100644 --- a/docs/en/sql-reference/functions/logical-functions.md +++ b/docs/en/sql-reference/functions/logical-functions.md @@ -1,18 +1,18 @@ --- slug: /en/sql-reference/functions/logical-functions -sidebar_position: 37 +sidebar_position: 110 sidebar_label: Logical --- # Logical Functions -Performs logical operations on arguments of any numeric types, but returns a [UInt8](../../sql-reference/data-types/int-uint.md) number equal to 0, 1 or `NULL` in some cases. +Below functions perform logical operations on arguments of arbitrary numeric types. They return either 0 or 1 as [UInt8](../../sql-reference/data-types/int-uint.md) or in some cases `NULL`. -Zero as an argument is considered `false`, while any non-zero value is considered `true`. +Zero as an argument is considered `false`, non-zero values are considered `true`. ## and -Calculates the result of the logical conjunction between two or more values. Corresponds to [Logical AND Operator](../../sql-reference/operators/index.md#logical-and-operator). +Calculates the logical conjunction between two or more values. **Syntax** @@ -20,24 +20,24 @@ Calculates the result of the logical conjunction between two or more values. Cor and(val1, val2...) ``` -You can use the [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) setting to calculate the `and` function according to a short scheme. If this setting is enabled, `vali` is evaluated only on rows where `(val1 AND val2 AND ... AND val{i-1})` is true. For example, an exception about division by zero is not thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(10)`. +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `(val_1 AND val_2 AND ... AND val_{i-1})` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT and(number = 2, intDiv(1, number)) FROM numbers(5)`. + +Alias: The [AND Operator](../../sql-reference/operators/index.md#logical-and-operator). **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** -- `0`, if there is at least one zero value argument. -- `NULL`, if there are no zero values arguments and there is at least one `NULL` argument. -- `1`, otherwise. +- `0`, if there at least one argument evaluates to `false`, +- `NULL`, if no argumetn evaluates to `false` and at least one argument is `NULL`, +- `1`, otherwise. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** -Query: - ``` sql SELECT and(0, 1, -2); ``` @@ -66,7 +66,7 @@ Result: ## or -Calculates the result of the logical disjunction between two or more values. Corresponds to [Logical OR Operator](../../sql-reference/operators/index.md#logical-or-operator). +Calculates the logical disjunction between two or more values. **Syntax** @@ -74,24 +74,24 @@ Calculates the result of the logical disjunction between two or more values. Cor or(val1, val2...) ``` -You can use the [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) setting to calculate the `or` function according to a short scheme. If this setting is enabled, `vali` is evaluated only on rows where `((NOT val1) AND (NOT val2) AND ... AND (NOT val{i-1}))` is true. For example, an exception about division by zero is not thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(10)`. +Setting [short_circuit_function_evaluation](../../operations/settings/settings.md#short-circuit-function-evaluation) controls whether short-circuit evaluation is used. If enabled, `val_i` is evaluated only if `((NOT val_1) AND (NOT val_2) AND ... AND (NOT val_{i-1}))` is `true`. For example, with short-circuit evaluation, no division-by-zero exception is thrown when executing the query `SELECT or(number = 0, intDiv(1, number) != 0) FROM numbers(5)`. + +Alias: The [OR Operator](../../sql-reference/operators/index.md#logical-or-operator). **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** -- `1`, if there is at least one non-zero value. -- `0`, if there are only zero values. -- `NULL`, if there are only zero values and `NULL`. +- `1`, if at least one argument evalutes to `true`, +- `0`, if all arguments evaluate to `false`, +- `NULL`, if all arguments evaluate to `false` and at least one argument is `NULL`. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** -Query: - ``` sql SELECT or(1, 0, 0, 2, NULL); ``` @@ -120,7 +120,7 @@ Result: ## not -Calculates the result of the logical negation of the value. Corresponds to [Logical Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). +Calculates logical negation of a value. **Syntax** @@ -128,22 +128,22 @@ Calculates the result of the logical negation of the value. Corresponds to [Logi not(val); ``` +Alias: The [Negation Operator](../../sql-reference/operators/index.md#logical-negation-operator). + **Arguments** -- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val` — The value. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** -- `1`, if the `val` is `0`. -- `0`, if the `val` is a non-zero value. -- `NULL`, if the `val` is a `NULL` value. +- `1`, if `val` evaluates to `false`, +- `0`, if `val` evaluates to `true`, +- `NULL`, if `val` is `NULL`. Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** -Query: - ``` sql SELECT NOT(1); ``` @@ -158,7 +158,7 @@ Result: ## xor -Calculates the result of the logical exclusive disjunction between two or more values. For more than two values the function works as if it calculates `XOR` of the first two values and then uses the result with the next value to calculate `XOR` and so on. +Calculates the logical exclusive disjunction between two or more values. For more than two values the function first xor-s the first two values, then xor-s the result with the third value etc. **Syntax** @@ -168,20 +168,18 @@ xor(val1, val2...) **Arguments** -- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). +- `val1, val2, ...` — List of at least two values. [Int](../../sql-reference/data-types/int-uint.md), [UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Nullable](../../sql-reference/data-types/nullable.md). **Returned value** -- `1`, for two values: if one of the values is zero and other is not. -- `0`, for two values: if both values are zero or non-zero at the same time. -- `NULL`, if there is at least one `NULL` value. +- `1`, for two values: if one of the values evaluates to `false` and other does not, +- `0`, for two values: if both values evalute to `false` or to both `true`, +- `NULL`, if at least one of the inputs is `NULL` Type: [UInt8](../../sql-reference/data-types/int-uint.md) or [Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md)). **Example** -Query: - ``` sql SELECT xor(0, 1, 1); ``` diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md index 98408ef459c..44ce4dcd211 100644 --- a/docs/en/sql-reference/functions/machine-learning-functions.md +++ b/docs/en/sql-reference/functions/machine-learning-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/machine-learning-functions -sidebar_position: 64 +sidebar_position: 115 sidebar_label: Machine Learning --- @@ -16,4 +16,4 @@ The [stochasticLinearRegression](../../sql-reference/aggregate-functions/referen ## stochasticLogisticRegression -The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data. \ No newline at end of file +The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data. diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md index 10bc73c4a72..9851378d4fd 100644 --- a/docs/en/sql-reference/functions/math-functions.md +++ b/docs/en/sql-reference/functions/math-functions.md @@ -1,120 +1,260 @@ --- slug: /en/sql-reference/functions/math-functions -sidebar_position: 44 +sidebar_position: 125 sidebar_label: Mathematical --- # Mathematical Functions -All the functions return a Float64 number. The accuracy of the result is close to the maximum precision possible, but the result might not coincide with the machine representable number nearest to the corresponding real number. +All the functions return a Float64 number. Results are generally as close to the actual result as possible, but in some cases less precise than the machine-representable number. -## e() +## e -Returns a Float64 number that is close to the number e. +Returns e. -## pi() +**Syntax** -Returns a Float64 number that is close to the number π. +```sql +e() +``` -## exp(x) +## pi -Accepts a numeric argument and returns a Float64 number close to the exponent of the argument. +Returns π. -## log(x), ln(x) +**Syntax** -Accepts a numeric argument and returns a Float64 number close to the natural logarithm of the argument. +```sql +pi() +``` -## exp2(x) +## exp -Accepts a numeric argument and returns a Float64 number close to 2 to the power of x. +Returns e to the power of the given argument. -## log2(x) +**Syntax** -Accepts a numeric argument and returns a Float64 number close to the binary logarithm of the argument. +```sql +exp(x) +``` -## exp10(x) +## log -Accepts a numeric argument and returns a Float64 number close to 10 to the power of x. +Returns the natural logarithm of the argument. -## log10(x) +**Syntax** -Accepts a numeric argument and returns a Float64 number close to the decimal logarithm of the argument. +```sql +log(x) +``` -## sqrt(x) +Alias: `ln(x)` -Accepts a numeric argument and returns a Float64 number close to the square root of the argument. +## exp2 -## cbrt(x) +Returns 2 to the power of the given argumetn -Accepts a numeric argument and returns a Float64 number close to the cubic root of the argument. +**Syntax** -## erf(x) +```sql +exp2(x) +``` -If ‘x’ is non-negative, then `erf(x / σ√2)` is the probability that a random variable having a normal distribution with standard deviation ‘σ’ takes the value that is separated from the expected value by more than ‘x’. +## intExp2 -Example (three sigma rule): +Like `exp` but returns a UInt64. + +**Syntax** + +```sql +intExp2(x) +``` + +## log2 + +Returns the binary logarithm of the argument. + +**Syntax** + +```sql +log2(x) +``` + +## exp10 + +Returns 10 to the power of the given argumetn + +**Syntax** + +```sql +exp10(x) +``` + +## intExp10 + +Like `exp10` but returns a UInt64. + +**Syntax** + +```sql +intExp10(x) +``` + +## log10 + +Returns the decimal logarithm of the argument. + +**Syntax** + +```sql +log10(x) +``` + +## sqrt + +Returns the square root of the argument. + +```sql +sqrt(x) +``` + +## cbrt + +Returns the cubic root of the argument. + +```sql +cbrt(x) +``` + +## erf + +If `x` is non-negative, then `erf(x / σ√2)` is the probability that a random variable having a normal distribution with standard deviation `σ` takes the value that is separated from the expected value by more than `x`. + +**Syntax** + +```sql +erf(x) +``` + +**Example** + +(three sigma rule) ``` sql SELECT erf(3 / sqrt(2)); ``` -``` text +```result ┌─erf(divide(3, sqrt(2)))─┐ │ 0.9973002039367398 │ └─────────────────────────┘ ``` -## erfc(x) +## erfc -Accepts a numeric argument and returns a Float64 number close to 1 - erf(x), but without loss of precision for large ‘x’ values. +Returns a number close to `1 - erf(x)` without loss of precision for large ‘x’ values. -## lgamma(x) +**Syntax** -The logarithm of the gamma function. +```sql +erfc(x) +``` -## tgamma(x) +## lgamma -Gamma function. +Returns the logarithm of the gamma function. -## sin(x) +**Syntax** -The sine. +```sql +lgamma(x) +``` -## cos(x) +## tgamma -The cosine. +Returns the gamma function. -## tan(x) +**Syntax** -The tangent. +```sql +gamma(x) +``` -## asin(x) +## sin -The arc sine. +Returns the sine of the argument -## acos(x) +**Syntax** -The arc cosine. +```sql +sin(x) +``` -## atan(x) +## cos -The arc tangent. +Returns the cosine of the argument. -## pow(x, y), power(x, y) +**Syntax** -Takes two numeric arguments x and y. Returns a Float64 number close to x to the power of y. +```sql +cos(x) +``` -## intExp2 +## tan -Accepts a numeric argument and returns a UInt64 number close to 2 to the power of x. +Returns the tangent of the argument. -## intExp10 +**Syntax** -Accepts a numeric argument and returns a UInt64 number close to 10 to the power of x. +```sql +tan(x) +``` -## cosh(x) +## asin -[Hyperbolic cosine](https://in.mathworks.com/help/matlab/ref/cosh.html). +Returns the arc sine of the argument. + +**Syntax** + +```sql +asin(x) +``` + +## acos + +Returns the arc cosine of the argument. + +**Syntax** + +```sql +acos(x) +``` + +## atan + +Returns the arc tangent of the argument. + +**Syntax** + +```sql +atan(x) +``` + +## pow + +Returns `x` to the power of `y`. + +**Syntax** + +```sql +pow(x, y) +``` + +Alias: `power(x, y)` + +## cosh + +Returns the [hyperbolic cosine](https://in.mathworks.com/help/matlab/ref/cosh.html) of the argument. **Syntax** @@ -124,33 +264,31 @@ cosh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Values from the interval: `1 <= cosh(x) < +∞`. +- Values from the interval: `1 <= cosh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT cosh(0); ``` Result: -``` text +```result ┌─cosh(0)──┐ │ 1 │ └──────────┘ ``` -## acosh(x) +## acosh -[Inverse hyperbolic cosine](https://www.mathworks.com/help/matlab/ref/acosh.html). +Returns the [inverse hyperbolic cosine](https://www.mathworks.com/help/matlab/ref/acosh.html). **Syntax** @@ -160,37 +298,31 @@ acosh(x) **Arguments** -- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Hyperbolic cosine of angle. Values from the interval: `1 <= x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. +- The angle, in radians. Values from the interval: `0 <= acosh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT acosh(1); ``` Result: -``` text +```result ┌─acosh(1)─┐ │ 0 │ └──────────┘ ``` -**See Also** +## sinh -- [cosh(x)](../../sql-reference/functions/math-functions.md#coshx) - -## sinh(x) - -[Hyperbolic sine](https://www.mathworks.com/help/matlab/ref/sinh.html). +Returns the [hyperbolic sine](https://www.mathworks.com/help/matlab/ref/sinh.html). **Syntax** @@ -200,33 +332,31 @@ sinh(x) **Arguments** -- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — The angle, in radians. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Values from the interval: `-∞ < sinh(x) < +∞`. +- Values from the interval: `-∞ < sinh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT sinh(0); ``` Result: -``` text +```result ┌─sinh(0)──┐ │ 0 │ └──────────┘ ``` -## asinh(x) +## asinh -[Inverse hyperbolic sine](https://www.mathworks.com/help/matlab/ref/asinh.html). +Returns the [inverse hyperbolic sine](https://www.mathworks.com/help/matlab/ref/asinh.html). **Syntax** @@ -236,37 +366,31 @@ asinh(x) **Arguments** -- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Hyperbolic sine of angle. Values from the interval: `-∞ < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. +- The angle, in radians. Values from the interval: `-∞ < asinh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT asinh(0); ``` Result: -``` text +```result ┌─asinh(0)─┐ │ 0 │ └──────────┘ ``` -**See Also** +## atanh -- [sinh(x)](../../sql-reference/functions/math-functions.md#sinhx) - -## atanh(x) - -[Inverse hyperbolic tangent](https://www.mathworks.com/help/matlab/ref/atanh.html). +Returns the [inverse hyperbolic tangent](https://www.mathworks.com/help/matlab/ref/atanh.html). **Syntax** @@ -276,33 +400,31 @@ atanh(x) **Arguments** -- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Hyperbolic tangent of angle. Values from the interval: `–1 < x < 1`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. +- The angle, in radians. Values from the interval: `-∞ < atanh(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT atanh(0); ``` Result: -``` text +```result ┌─atanh(0)─┐ │ 0 │ └──────────┘ ``` -## atan2(y, x) +## atan2 -The [function](https://en.wikipedia.org/wiki/Atan2) calculates the angle in the Euclidean plane, given in radians, between the positive x axis and the ray to the point `(x, y) ≠ (0, 0)`. +Returns the [atan2](https://en.wikipedia.org/wiki/Atan2) as the angle in the Euclidean plane, given in radians, between the positive x axis and the ray to the point `(x, y) ≠ (0, 0)`. **Syntax** @@ -312,34 +434,32 @@ atan2(y, x) **Arguments** -- `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). -- `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `y` — y-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — x-coordinate of the point through which the ray passes. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The angle `θ` such that `−π < θ ≤ π`, in radians. +- The angle `θ` such that `−π < θ ≤ π`, in radians. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT atan2(1, 1); ``` Result: -``` text +```result ┌────────atan2(1, 1)─┐ │ 0.7853981633974483 │ └────────────────────┘ ``` -## hypot(x, y) +## hypot -Calculates the length of the hypotenuse of a right-angle triangle. The [function](https://en.wikipedia.org/wiki/Hypot) avoids problems that occur when squaring very large or very small numbers. +Returns the length of the hypotenuse of a right-angle triangle. [Hypot](https://en.wikipedia.org/wiki/Hypot) avoids problems that occur when squaring very large or very small numbers. **Syntax** @@ -349,34 +469,32 @@ hypot(x, y) **Arguments** -- `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). -- `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — The first cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `y` — The second cathetus of a right-angle triangle. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- The length of the hypotenuse of a right-angle triangle. +- The length of the hypotenuse of a right-angle triangle. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT hypot(1, 1); ``` Result: -``` text +```result ┌────────hypot(1, 1)─┐ │ 1.4142135623730951 │ └────────────────────┘ ``` -## log1p(x) +## log1p -Calculates `log(1+x)`. The [function](https://en.wikipedia.org/wiki/Natural_logarithm#lnp1) `log1p(x)` is more accurate than `log(1+x)` for small values of x. +Calculates `log(1+x)`. The [calculation](https://en.wikipedia.org/wiki/Natural_logarithm#lnp1) `log1p(x)` is more accurate than `log(1+x)` for small values of x. **Syntax** @@ -386,35 +504,29 @@ log1p(x) **Arguments** -- `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Values from the interval: `-1 < x < +∞`. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Values from the interval: `-∞ < log1p(x) < +∞`. +- Values from the interval: `-∞ < log1p(x) < +∞`. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT log1p(0); ``` Result: -``` text +```result ┌─log1p(0)─┐ │ 0 │ └──────────┘ ``` -**See Also** - -- [log(x)](../../sql-reference/functions/math-functions.md#logx-lnx) - -## sign(x) +## sign Returns the sign of a real number. @@ -426,13 +538,13 @@ sign(x) **Arguments** -- `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. +- `x` — Values from `-∞` to `+∞`. Support all numeric types in ClickHouse. **Returned value** - -1 for `x < 0` -- 0 for `x = 0` -- 1 for `x > 0` +- 0 for `x = 0` +- 1 for `x > 0` **Examples** @@ -444,7 +556,7 @@ SELECT sign(0); Result: -``` text +```result ┌─sign(0)─┐ │ 0 │ └─────────┘ @@ -458,7 +570,7 @@ SELECT sign(1); Result: -``` text +```result ┌─sign(1)─┐ │ 1 │ └─────────┘ @@ -472,15 +584,15 @@ SELECT sign(-1); Result: -``` text +```result ┌─sign(-1)─┐ │ -1 │ └──────────┘ ``` -## degrees(x) +## degrees -Converts the input value in radians to degrees. +Converts radians to degrees. **Syntax** @@ -490,33 +602,31 @@ degrees(x) **Arguments** -- `x` — Input in radians. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Input in radians. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Value in degrees. +- Value in degrees. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT degrees(3.141592653589793); ``` Result: -``` text +```result ┌─degrees(3.141592653589793)─┐ │ 180 │ └────────────────────────────┘ ``` -## radians(x) +## radians -Converts the input value in degrees to radians. +Converts degrees to radians. **Syntax** @@ -526,34 +636,31 @@ radians(x) **Arguments** -- `x` — Input in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64). +- `x` — Input in degrees. [Float64](../../sql-reference/data-types/float.md#float32-float64). **Returned value** -- Value in radians. +- Value in radians. Type: [Float64](../../sql-reference/data-types/float.md#float32-float64). **Example** -Query: - ``` sql SELECT radians(180); ``` Result: -``` text +```result ┌──────radians(180)─┐ │ 3.141592653589793 │ └───────────────────┘ ``` +## factorial -## factorial(n) - -Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64. +Computes the factorial of an integer value. Works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64. The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater will cause exception throw. @@ -566,21 +673,19 @@ factorial(n) **Example** -Query: - ``` sql SELECT factorial(10); ``` Result: -``` text +```result ┌─factorial(10)─┐ │ 3628800 │ └───────────────┘ ``` -## width_bucket(operand, low, high, count) +## width_bucket Returns the number of the bucket in which `operand` falls in a histogram having `count` equal-width buckets spanning the range `low` to `high`. Returns `0` if `operand < low`, and returns `count+1` if `operand >= high`. @@ -591,21 +696,18 @@ Returns the number of the bucket in which `operand` falls in a histogram having ```sql widthBucket(operand, low, high, count) ``` - -There is also a case insensitive alias called `WIDTH_BUCKET` to provide compatibility with other databases. +Alias: `WIDTH_BUCKET` **Example** -Query: - ``` sql SELECT widthBucket(10.15, -8.6, 23, 18); ``` Result: -``` text +```result ┌─widthBucket(10.15, -8.6, 23, 18)─┐ │ 11 │ └──────────────────────────────────┘ -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index 132a126edee..41773dc1a0d 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -1,8 +1,7 @@ --- slug: /en/sql-reference/functions/nlp-functions -sidebar_position: 67 -sidebar_label: NLP -title: "[experimental] Natural Language Processing functions" +sidebar_position: 130 +sidebar_label: NLP (experimental) --- :::note @@ -21,8 +20,8 @@ stem('language', word) **Arguments** -- `language` — Language which rules will be applied. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). -- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). +- `language` — Language which rules will be applied. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). +- `word` — word that needs to be stemmed. Must be in lowercase. [String](../../sql-reference/data-types/string.md#string). **Examples** @@ -52,8 +51,8 @@ lemmatize('language', word) **Arguments** -- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string). +- `language` — Language which rules will be applied. [String](../../sql-reference/data-types/string.md#string). +- `word` — Word that needs to be lemmatized. Must be lowercase. [String](../../sql-reference/data-types/string.md#string). **Examples** @@ -97,8 +96,8 @@ synonyms('extension_name', word) **Arguments** -- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string). -- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string). +- `extension_name` — Name of the extension in which search will be performed. [String](../../sql-reference/data-types/string.md#string). +- `word` — Word that will be searched in extension. [String](../../sql-reference/data-types/string.md#string). **Examples** @@ -146,7 +145,7 @@ detectLanguage('text_to_be_analyzed') **Arguments** -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). **Returned value** @@ -184,7 +183,7 @@ detectLanguageMixed('text_to_be_analyzed') **Arguments** -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). **Returned value** @@ -220,7 +219,7 @@ detectLanguageUnknown('text_to_be_analyzed') **Arguments** -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). **Returned value** @@ -260,7 +259,7 @@ detectCharset('text_to_be_analyzed') **Arguments** -- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). +- `text_to_be_analyzed` — A collection (or sentences) of strings to analyze. [String](../../sql-reference/data-types/string.md#string). **Returned value** diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 2e44fa5e9f6..600ec576339 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/other-functions -sidebar_position: 67 +sidebar_position: 140 sidebar_label: Other --- @@ -23,11 +23,11 @@ getMacro(name); **Arguments** -- `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). +- `name` — Name to retrieve from the `macros` section. [String](../../sql-reference/data-types/string.md#string). **Returned value** -- Value of the specified macro. +- Value of the specified macro. Type: [String](../../sql-reference/data-types/string.md). @@ -82,7 +82,7 @@ This function is case-insensitive. **Returned value** -- String with the fully qualified domain name. +- String with the fully qualified domain name. Type: `String`. @@ -112,17 +112,17 @@ basename( expr ) **Arguments** -- `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value. +- `expr` — Expression resulting in a [String](../../sql-reference/data-types/string.md) type value. All the backslashes must be escaped in the resulting value. **Returned Value** A string that contains: -- The trailing part of a string after the last slash or backslash. +- The trailing part of a string after the last slash or backslash. If the input string contains a path ending with slash or backslash, for example, `/` or `c:\`, the function returns an empty string. -- The original string if there are no slashes or backslashes. +- The original string if there are no slashes or backslashes. **Example** @@ -196,11 +196,11 @@ byteSize(argument [, ...]) **Arguments** -- `argument` — Value. +- `argument` — Value. **Returned value** -- Estimation of byte size of the arguments in memory. +- Estimation of byte size of the arguments in memory. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -316,8 +316,8 @@ Alias: `user()`, `USER()`. **Returned values** -- Login of current user. -- Login of user that initiated query in case of disributed query. +- Login of current user. +- Login of user that initiated query in case of disributed query. Type: `String`. @@ -353,12 +353,12 @@ isConstant(x) **Arguments** -- `x` — Expression to check. +- `x` — Expression to check. **Returned values** -- `1` — `x` is constant. -- `0` — `x` is non-constant. +- `1` — `x` is constant. +- `0` — `x` is non-constant. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -424,13 +424,13 @@ Checks whether floating point value is finite. **Arguments** -- `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). -- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). +- `x` — Value to be checked for infinity. Type: [Float\*](../../sql-reference/data-types/float.md). +- `y` — Fallback value. Type: [Float\*](../../sql-reference/data-types/float.md). **Returned value** -- `x` if `x` is finite. -- `y` if `x` is not finite. +- `x` if `x` is finite. +- `y` if `x` is not finite. **Example** @@ -464,9 +464,9 @@ Allows building a unicode-art diagram. **Arguments** -- `x` — Size to display. -- `min, max` — Integer constants. The value must fit in `Int64`. -- `width` — Constant, positive integer, can be fractional. +- `x` — Size to display. +- `min, max` — Integer constants. The value must fit in `Int64`. +- `width` — Constant, positive integer, can be fractional. The band is drawn with accuracy to one eighth of a symbol. @@ -670,8 +670,8 @@ formatReadableTimeDelta(column[, maximum_unit]) **Arguments** -- `column` — A column with numeric time delta. -- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. +- `column` — A column with numeric time delta. +- `maximum_unit` — Optional. Maximum unit to show. Acceptable values seconds, minutes, hours, days, months, years. Example: @@ -715,12 +715,12 @@ parseTimeDelta(timestr) **Arguments** -- `timestr` — A sequence of numbers followed by something resembling a time unit. +- `timestr` — A sequence of numbers followed by something resembling a time unit. **Returned value** -- A floating-point number with the number of seconds. +- A floating-point number with the number of seconds. **Example** @@ -801,14 +801,14 @@ To prevent that you can make a subquery with [ORDER BY](../../sql-reference/stat **Arguments** -- `column` — A column name or scalar expression. -- `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md). -- `default_value` — Optional. The value to be returned if offset goes beyond the scope of the block. Type of data blocks affected. +- `column` — A column name or scalar expression. +- `offset` — The number of rows forwards or backwards from the current row of `column`. [Int64](../../sql-reference/data-types/int-uint.md). +- `default_value` — Optional. The value to be returned if offset goes beyond the scope of the block. Type of data blocks affected. **Returned values** -- Value for `column` in `offset` distance from current row if `offset` value is not outside block bounds. -- Default value for `column` if `offset` value is outside block bounds. If `default_value` is given, then it will be used. +- Value for `column` in `offset` distance from current row if `offset` value is not outside block bounds. +- Default value for `column` if `offset` value is outside block bounds. If `default_value` is given, then it will be used. Type: type of data blocks affected or default value type. @@ -998,12 +998,12 @@ runningConcurrency(start, end) **Arguments** -- `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). -- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `start` — A column with the start time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `end` — A column with the end time of events. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned values** -- The number of concurrent events at each event start time. +- The number of concurrent events at each event start time. Type: [UInt32](../../sql-reference/data-types/int-uint.md) @@ -1059,12 +1059,12 @@ getSizeOfEnumType(value) **Arguments:** -- `value` — Value of type `Enum`. +- `value` — Value of type `Enum`. **Returned values** -- The number of fields with `Enum` input values. -- An exception is thrown if the type is not `Enum`. +- The number of fields with `Enum` input values. +- An exception is thrown if the type is not `Enum`. **Example** @@ -1088,11 +1088,11 @@ blockSerializedSize(value[, value[, ...]]) **Arguments** -- `value` — Any value. +- `value` — Any value. **Returned values** -- The number of bytes that will be written to disk for block of values (without compression). +- The number of bytes that will be written to disk for block of values (without compression). **Example** @@ -1120,11 +1120,11 @@ toColumnTypeName(value) **Arguments:** -- `value` — Any type of value. +- `value` — Any type of value. **Returned values** -- A string with the name of the class that is used for representing the `value` data type in RAM. +- A string with the name of the class that is used for representing the `value` data type in RAM. **Example of the difference between`toTypeName ' and ' toColumnTypeName`** @@ -1160,11 +1160,11 @@ dumpColumnStructure(value) **Arguments:** -- `value` — Any type of value. +- `value` — Any type of value. **Returned values** -- A string describing the structure that is used for representing the `value` data type in RAM. +- A string describing the structure that is used for representing the `value` data type in RAM. **Example** @@ -1190,13 +1190,13 @@ defaultValueOfArgumentType(expression) **Arguments:** -- `expression` — Arbitrary type of value or an expression that results in a value of an arbitrary type. +- `expression` — Arbitrary type of value or an expression that results in a value of an arbitrary type. **Returned values** -- `0` for numbers. -- Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `0` for numbers. +- Empty string for strings. +- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). **Example** @@ -1232,13 +1232,13 @@ defaultValueOfTypeName(type) **Arguments:** -- `type` — A string representing a type name. +- `type` — A string representing a type name. **Returned values** -- `0` for numbers. -- Empty string for strings. -- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). +- `0` for numbers. +- Empty string for strings. +- `ᴺᵁᴸᴸ` for [Nullable](../../sql-reference/data-types/nullable.md). **Example** @@ -1377,8 +1377,8 @@ SELECT replicate(x, arr); **Arguments:** -- `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`. -- `x` — The value that the resulting array will be filled with. +- `arr` — Original array. ClickHouse creates a new array of the same length as the original and fills it with the value `x`. +- `x` — The value that the resulting array will be filled with. **Returned value** @@ -1414,7 +1414,7 @@ filesystemAvailable() **Returned value** -- The amount of remaining space available in bytes. +- The amount of remaining space available in bytes. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1446,7 +1446,7 @@ filesystemFree() **Returned value** -- Amount of free space in bytes. +- Amount of free space in bytes. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1478,7 +1478,7 @@ filesystemCapacity() **Returned value** -- Capacity information of the filesystem in bytes. +- Capacity information of the filesystem in bytes. Type: [UInt64](../../sql-reference/data-types/int-uint.md). @@ -1510,8 +1510,8 @@ initializeAggregation (aggregate_function, arg1, arg2, ..., argN) **Arguments** -- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md). -- `arg` — Arguments of aggregate function. +- `aggregate_function` — Name of the aggregation function to initialize. [String](../../sql-reference/data-types/string.md). +- `arg` — Arguments of aggregate function. **Returned value(s)** @@ -1568,7 +1568,7 @@ INSERT INTO metrics VALUES (0, initializeAggregation('sumState', toUInt64(42))) ``` **See Also** -- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) +- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) ## finalizeAggregation @@ -1582,11 +1582,11 @@ finalizeAggregation(state) **Arguments** -- `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `state` — State of aggregation. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). **Returned value(s)** -- Value/values that was aggregated. +- Value/values that was aggregated. Type: Value of any types that was aggregated. @@ -1667,8 +1667,8 @@ Result: ``` **See Also** -- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) -- [initializeAggregation](#initializeaggregation) +- [arrayReduce](../../sql-reference/functions/array-functions.md#arrayreduce) +- [initializeAggregation](#initializeaggregation) ## runningAccumulate @@ -1686,12 +1686,12 @@ runningAccumulate(agg_state[, grouping]); **Arguments** -- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). -- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. +- `agg_state` — State of the aggregate function. [AggregateFunction](../../sql-reference/data-types/aggregatefunction.md#data-type-aggregatefunction). +- `grouping` — Grouping key. Optional. The state of the function is reset if the `grouping` value is changed. It can be any of the [supported data types](../../sql-reference/data-types/index.md) for which the equality operator is defined. **Returned value** -- Each resulting row contains a result of the aggregate function, accumulated for all the input rows from 0 to the current position. `runningAccumulate` resets states for each new data block or when the `grouping` value changes. +- Each resulting row contains a result of the aggregate function, accumulated for all the input rows from 0 to the current position. `runningAccumulate` resets states for each new data block or when the `grouping` value changes. Type depends on the aggregate function used. @@ -1792,9 +1792,9 @@ joinGet(join_storage_table_name, `value_column`, join_keys) **Arguments** -- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. -- `value_column` — name of the column of the table that contains required data. -- `join_keys` — list of keys. +- `join_storage_table_name` — an [identifier](../../sql-reference/syntax.md#syntax-identifiers) indicates where search is performed. The identifier is searched in the default database (see parameter `default_database` in the config file). To override the default database, use the `USE db_name` or specify the database and the table through the separator `db_name.db_table`, see the example. +- `value_column` — name of the column of the table that contains required data. +- `join_keys` — list of keys. **Returned value** @@ -1939,13 +1939,13 @@ randomPrintableASCII(length) **Arguments** -- `length` — Resulting string length. Positive integer. +- `length` — Resulting string length. Positive integer. If you pass `length < 0`, behavior of the function is undefined. **Returned value** -- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. +- String with a random set of [ASCII](https://en.wikipedia.org/wiki/ASCII#Printable_characters) printable characters. Type: [String](../../sql-reference/data-types/string.md) @@ -1975,11 +1975,11 @@ randomString(length) **Arguments** -- `length` — String length. Positive integer. +- `length` — String length. Positive integer. **Returned value** -- String filled with random bytes. +- String filled with random bytes. Type: [String](../../sql-reference/data-types/string.md). @@ -2007,8 +2007,8 @@ len: 30 **See Also** -- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom) -- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) +- [generateRandom](../../sql-reference/table-functions/generate.md#generaterandom) +- [randomPrintableASCII](../../sql-reference/functions/other-functions.md#randomascii) ## randomFixedString @@ -2023,11 +2023,11 @@ randomFixedString(length); **Arguments** -- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — String length in bytes. [UInt64](../../sql-reference/data-types/int-uint.md). **Returned value(s)** -- String filled with random bytes. +- String filled with random bytes. Type: [FixedString](../../sql-reference/data-types/fixedstring.md). @@ -2060,11 +2060,11 @@ randomStringUTF8(length); **Arguments** -- `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). +- `length` — Required length of the resulting string in code points. [UInt64](../../sql-reference/data-types/int-uint.md). **Returned value(s)** -- UTF-8 random string. +- UTF-8 random string. Type: [String](../../sql-reference/data-types/string.md). @@ -2097,11 +2097,11 @@ getSetting('custom_setting'); **Parameter** -- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md). +- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md). **Returned value** -- The setting current value. +- The setting current value. **Example** @@ -2118,7 +2118,7 @@ SELECT getSetting('custom_a'); **See Also** -- [Custom Settings](../../operations/settings/index.md#custom_settings) +- [Custom Settings](../../operations/settings/index.md#custom_settings) ## isDecimalOverflow @@ -2132,13 +2132,13 @@ isDecimalOverflow(d, [p]) **Arguments** -- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). -- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). +- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md). +- `p` — precision. Optional. If omitted, the initial precision of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). **Returned values** -- `1` — Decimal value has more digits then it's precision allow, -- `0` — Decimal value satisfies the specified precision. +- `1` — Decimal value has more digits then it's precision allow, +- `0` — Decimal value satisfies the specified precision. **Example** @@ -2169,7 +2169,7 @@ countDigits(x) **Arguments** -- `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. +- `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value. **Returned value** @@ -2201,7 +2201,7 @@ Result: **Returned value** -- Variable name for the error code. +- Variable name for the error code. Type: [LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md). @@ -2230,11 +2230,11 @@ tcpPort() **Arguments** -- None. +- None. **Returned value** -- The TCP port number. +- The TCP port number. Type: [UInt16](../../sql-reference/data-types/int-uint.md). @@ -2256,7 +2256,7 @@ Result: **See Also** -- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) +- [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) ## currentProfiles @@ -2272,7 +2272,7 @@ currentProfiles() **Returned value** -- List of the current user settings profiles. +- List of the current user settings profiles. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2288,7 +2288,7 @@ enabledProfiles() **Returned value** -- List of the enabled settings profiles. +- List of the enabled settings profiles. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2304,7 +2304,7 @@ defaultProfiles() **Returned value** -- List of the default settings profiles. +- List of the default settings profiles. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2320,7 +2320,7 @@ currentRoles() **Returned value** -- List of the current roles for the current user. +- List of the current roles for the current user. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2336,7 +2336,7 @@ enabledRoles() **Returned value** -- List of the enabled roles for the current user. +- List of the enabled roles for the current user. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2352,7 +2352,7 @@ defaultRoles() **Returned value** -- List of the default roles for the current user. +- List of the default roles for the current user. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -2368,22 +2368,22 @@ getServerPort(port_name) **Arguments** -- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: +- `port_name` — The name of the server port. [String](../../sql-reference/data-types/string.md#string). Possible values: - - 'tcp_port' - - 'tcp_port_secure' - - 'http_port' - - 'https_port' - - 'interserver_http_port' - - 'interserver_https_port' - - 'mysql_port' - - 'postgresql_port' - - 'grpc_port' - - 'prometheus.port' + - 'tcp_port' + - 'tcp_port_secure' + - 'http_port' + - 'https_port' + - 'interserver_http_port' + - 'interserver_https_port' + - 'mysql_port' + - 'postgresql_port' + - 'grpc_port' + - 'prometheus.port' **Returned value** -- The number of the server port. +- The number of the server port. Type: [UInt16](../../sql-reference/data-types/int-uint.md). @@ -2417,7 +2417,7 @@ queryID() **Returned value** -- The ID of the current query. +- The ID of the current query. Type: [String](../../sql-reference/data-types/string.md) @@ -2453,7 +2453,7 @@ initialQueryID() **Returned value** -- The ID of the initial current query. +- The ID of the initial current query. Type: [String](../../sql-reference/data-types/string.md) @@ -2488,7 +2488,7 @@ shardNum() **Returned value** -- Shard index or constant `0`. +- Shard index or constant `0`. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -2515,7 +2515,7 @@ Result: **See Also** -- [Distributed Table Engine](../../engines/table-engines/special/distributed.md) +- [Distributed Table Engine](../../engines/table-engines/special/distributed.md) ## shardCount @@ -2530,7 +2530,7 @@ shardCount() **Returned value** -- Total number of shards or `0`. +- Total number of shards or `0`. Type: [UInt32](../../sql-reference/data-types/int-uint.md). @@ -2550,11 +2550,11 @@ getOSKernelVersion() **Arguments** -- None. +- None. **Returned value** -- The current OS kernel version. +- The current OS kernel version. Type: [String](../../sql-reference/data-types/string.md). @@ -2586,11 +2586,11 @@ zookeeperSessionUptime() **Arguments** -- None. +- None. **Returned value** -- Uptime of the current ZooKeeper session in seconds. +- Uptime of the current ZooKeeper session in seconds. Type: [UInt32](../../sql-reference/data-types/int-uint.md). diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index eb6866d28ea..21c8ffa6e8c 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -1,58 +1,44 @@ --- slug: /en/sql-reference/functions/random-functions -sidebar_position: 51 -sidebar_label: Pseudo-Random Numbers +sidebar_position: 145 +sidebar_label: Random Numbers --- -# Functions for Generating Pseudo-Random Numbers +# Functions for Generating Random Numbers -All the functions accept zero arguments or one argument. If an argument is passed, it can be any type, and its value is not used for anything. The only purpose of this argument is to prevent common subexpression elimination, so that two different instances of the same function return different columns with different random numbers. +All functions in this section accept zero or one arguments. The only use of the argument (if provided) is to prevent prevent [common subexpression +elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) such that two different execution of the same random +function in a query return different random values. -:::note -Non-cryptographic generators of pseudo-random numbers are used. +Related content +- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) + +:::note +The random numbers are generated by non-cryptographic algorithms. ::: ## rand, rand32 -Returns a pseudo-random UInt32 number, evenly distributed among all UInt32-type numbers. +Returns a random UInt32 number, evenly distributed accross the range of all possible UInt32 numbers. Uses a linear congruential generator. ## rand64 -Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type numbers. +Returns a random UInt64 number, evenly distributed accross the range of all possible UInt64 numbers. Uses a linear congruential generator. ## randCanonical -The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). -Non-deterministic. Return type is Float64. +Returns a Float64 value, evenly distributed in [0, 1). ## randConstant -Produces a constant column with a random value. - -**Syntax** - -``` sql -randConstant([x]) -``` - -**Arguments** - -- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. - -**Returned value** - -- Pseudo-random number. - -Type: [UInt32](../../sql-reference/data-types/int-uint.md). +Like `rand` but produces a constant column with a random value. **Example** -Query: - ``` sql SELECT rand(), rand(1), rand(number), randConstant(), randConstant(1), randConstant(number) FROM numbers(3) @@ -60,7 +46,7 @@ FROM numbers(3) Result: -``` text +``` result ┌─────rand()─┬────rand(1)─┬─rand(number)─┬─randConstant()─┬─randConstant(1)─┬─randConstant(number)─┐ │ 3047369878 │ 4132449925 │ 4044508545 │ 2740811946 │ 4229401477 │ 1924032898 │ │ 2938880146 │ 1267722397 │ 4154983056 │ 2740811946 │ 4229401477 │ 1924032898 │ @@ -68,17 +54,11 @@ Result: └────────────┴────────────┴──────────────┴────────────────┴─────────────────┴──────────────────────┘ ``` -# Functions for Generating Random Numbers based on Distributions - -:::note -These functions are available starting from 22.10. -::: - - +# Functions for Generating Random Numbers based on a Distribution ## randUniform -Return random number based on [continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution) in a specified range from `min` to `max`. +Returns a Float64 drawn uniformly from the interval between `min` and `max` ([continuous uniform distribution](https://en.wikipedia.org/wiki/Continuous_uniform_distribution)). **Syntax** @@ -93,21 +73,19 @@ randUniform(min, max) **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randUniform(5.5, 10) FROM numbers(5) ``` Result: -``` text +``` result ┌─randUniform(5.5, 10)─┐ │ 8.094978491443102 │ │ 7.3181248914450885 │ @@ -117,40 +95,36 @@ Result: └──────────────────────┘ ``` - - ## randNormal -Return random number based on [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). +Returns a Float64 drawn from a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). **Syntax** ``` sql -randNormal(meam, variance) +randNormal(mean, variance) ``` **Arguments** -- `meam` - `Float64` mean value of distribution, +- `mean` - `Float64` - mean value of distribution, - `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randNormal(10, 2) FROM numbers(5) ``` Result: -``` text +``` result ┌──randNormal(10, 2)─┐ │ 13.389228911709653 │ │ 8.622949707401295 │ @@ -160,40 +134,36 @@ Result: └────────────────────┘ ``` - - ## randLogNormal -Return random number based on [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution). +Returns a Float64 drawn from a [log-normal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution). **Syntax** ``` sql -randLogNormal(meam, variance) +randLogNormal(mean, variance) ``` **Arguments** -- `meam` - `Float64` mean value of distribution, +- `mean` - `Float64` - mean value of distribution, - `variance` - `Float64` - [variance](https://en.wikipedia.org/wiki/Variance). **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randLogNormal(100, 5) FROM numbers(5) ``` Result: -``` text +``` result ┌─randLogNormal(100, 5)─┐ │ 1.295699673937363e48 │ │ 9.719869109186684e39 │ @@ -203,11 +173,9 @@ Result: └───────────────────────┘ ``` - - ## randBinomial -Return random number based on [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). +Returns a UInt64 drawn from a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). **Syntax** @@ -217,26 +185,24 @@ randBinomial(experiments, probability) **Arguments** -- `experiments` - `UInt64` number of experiments, +- `experiments` - `UInt64` - number of experiments, - `probability` - `Float64` - probability of success in each experiment (values in `0...1` range only). **Returned value** -- Pseudo-random number. +- Random number. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT randBinomial(100, .75) FROM numbers(5) ``` Result: -``` text +``` result ┌─randBinomial(100, 0.75)─┐ │ 74 │ │ 78 │ @@ -246,11 +212,9 @@ Result: └─────────────────────────┘ ``` - - ## randNegativeBinomial -Return random number based on [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). +Returns a UInt64 drawn from a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). **Syntax** @@ -260,26 +224,24 @@ randNegativeBinomial(experiments, probability) **Arguments** -- `experiments` - `UInt64` number of experiments, +- `experiments` - `UInt64` - number of experiments, - `probability` - `Float64` - probability of failure in each experiment (values in `0...1` range only). **Returned value** -- Pseudo-random number. +- Random number. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT randNegativeBinomial(100, .75) FROM numbers(5) ``` Result: -``` text +``` result ┌─randNegativeBinomial(100, 0.75)─┐ │ 33 │ │ 32 │ @@ -289,11 +251,9 @@ Result: └─────────────────────────────────┘ ``` - - ## randPoisson -Return random number based on [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). +Returns a UInt64 drawn from a [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). **Syntax** @@ -303,25 +263,23 @@ randPoisson(n) **Arguments** -- `n` - `UInt64` mean number of occurrences. +- `n` - `UInt64` - mean number of occurrences. **Returned value** -- Pseudo-random number. +- Random number. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT randPoisson(10) FROM numbers(5) ``` Result: -``` text +``` result ┌─randPoisson(10)─┐ │ 8 │ │ 8 │ @@ -331,11 +289,9 @@ Result: └─────────────────┘ ``` - - ## randBernoulli -Return random number based on [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). +Returns a UInt64 drawn from a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). **Syntax** @@ -349,21 +305,19 @@ randBernoulli(probability) **Returned value** -- Pseudo-random number. +- Random number. Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). **Example** -Query: - ``` sql SELECT randBernoulli(.75) FROM numbers(5) ``` Result: -``` text +``` result ┌─randBernoulli(0.75)─┐ │ 1 │ │ 1 │ @@ -373,11 +327,9 @@ Result: └─────────────────────┘ ``` - - ## randExponential -Return random number based on [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). +Returns a Float64 drawn from a [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). **Syntax** @@ -387,25 +339,23 @@ randExponential(lambda) **Arguments** -- `lambda` - `Float64` lambda value. +- `lambda` - `Float64` - lambda value. **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randExponential(1/10) FROM numbers(5) ``` Result: -``` text +``` result ┌─randExponential(divide(1, 10))─┐ │ 44.71628934340778 │ │ 4.211013337903262 │ @@ -415,11 +365,9 @@ Result: └────────────────────────────────┘ ``` - - ## randChiSquared -Return random number based on [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables. +Returns a Float64 drawn from a [Chi-square distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution) - a distribution of a sum of the squares of k independent standard normal random variables. **Syntax** @@ -429,25 +377,23 @@ randChiSquared(degree_of_freedom) **Arguments** -- `degree_of_freedom` - `Float64` degree of freedom. +- `degree_of_freedom` - `Float64` - degree of freedom. **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randChiSquared(10) FROM numbers(5) ``` Result: -``` text +``` result ┌─randChiSquared(10)─┐ │ 10.015463656521543 │ │ 9.621799919882768 │ @@ -457,11 +403,9 @@ Result: └────────────────────┘ ``` - - ## randStudentT -Return random number based on [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). +Returns a Float64 drawn from a [Student's t-distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). **Syntax** @@ -471,25 +415,23 @@ randStudentT(degree_of_freedom) **Arguments** -- `degree_of_freedom` - `Float64` degree of freedom. +- `degree_of_freedom` - `Float64` - degree of freedom. **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randStudentT(10) FROM numbers(5) ``` Result: -``` text +``` result ┌─────randStudentT(10)─┐ │ 1.2217309938538725 │ │ 1.7941971681200541 │ @@ -499,11 +441,9 @@ Result: └──────────────────────┘ ``` - - ## randFisherF -Return random number based on [F-distribution](https://en.wikipedia.org/wiki/F-distribution). +Returns a Float64 drawn from a [F-distribution](https://en.wikipedia.org/wiki/F-distribution). **Syntax** @@ -513,26 +453,24 @@ randFisherF(d1, d2) **Arguments** -- `d1` - `Float64` d1 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, -- `d2` - `Float64` d2 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, +- `d1` - `Float64` - d1 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, +- `d2` - `Float64` - d2 degree of freedom in `X = (S1 / d1) / (S2 / d2)`, **Returned value** -- Pseudo-random number. +- Random number. Type: [Float64](/docs/en/sql-reference/data-types/float.md). **Example** -Query: - ``` sql SELECT randFisherF(10, 3) FROM numbers(5) ``` Result: -``` text +``` result ┌──randFisherF(10, 3)─┐ │ 7.286287504216609 │ │ 0.26590779413050386 │ @@ -542,35 +480,61 @@ Result: └─────────────────────┘ ``` - - - -# Random Functions for Working with Strings +# Functions for Generating Random Strings ## randomString +Returns a random String of specified `length`. Not all characters may be printable. + +**Syntax** + +```sql +randomString(length) +``` + ## randomFixedString +Like `randomString` but returns a FixedString. + ## randomPrintableASCII +Returns a random String of specified `length`. All characters are printable. + +**Syntax** + +```sql +randomPrintableASCII(length) +``` + ## randomStringUTF8 +Returns a random String containing `length` many UTF8 codepoints. Not all characters may be printable + +**Syntax** + +```sql +randomStringUTF8(length) +``` + ## fuzzBits **Syntax** -``` sql -fuzzBits([s], [prob]) -``` +Inverts the bits of String or FixedString `s`, each with probability `prob`. -Inverts bits of `s`, each with probability `prob`. +**Syntax** + +``` sql +fuzzBits(s, prob) +``` **Arguments** - `s` - `String` or `FixedString` - `prob` - constant `Float32/64` **Returned value** -Fuzzed string with same as s type. + +Fuzzed string with same type as `s`. **Example** @@ -581,13 +545,10 @@ FROM numbers(3) Result: -``` text +``` result ┌─fuzzBits(materialize('abacaba'), 0.1)─┐ │ abaaaja │ │ a*cjab+ │ │ aeca2A │ └───────────────────────────────────────┘ ``` - -## Related content -- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) diff --git a/docs/en/sql-reference/functions/rounding-functions.md b/docs/en/sql-reference/functions/rounding-functions.md index 01ee720cfd3..e9a0ed72466 100644 --- a/docs/en/sql-reference/functions/rounding-functions.md +++ b/docs/en/sql-reference/functions/rounding-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/rounding-functions -sidebar_position: 45 +sidebar_position: 155 sidebar_label: Rounding --- @@ -38,11 +38,11 @@ round(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). -- `decimal-places` — An integer value. - - If `decimal-places > 0` then the function rounds the value to the right of the decimal point. - - If `decimal-places < 0` then the function rounds the value to the left of the decimal point. - - If `decimal-places = 0` then the function rounds the value to integer. In this case the argument can be omitted. +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `decimal-places` — An integer value. + - If `decimal-places > 0` then the function rounds the value to the right of the decimal point. + - If `decimal-places < 0` then the function rounds the value to the left of the decimal point. + - If `decimal-places = 0` then the function rounds the value to integer. In this case the argument can be omitted. **Returned value:** @@ -101,27 +101,27 @@ round(3.65, 1) = 3.6 **See Also** -- [roundBankers](#roundbankers) +- [roundBankers](#roundbankers) ## roundBankers Rounds a number to a specified decimal position. -- If the rounding number is halfway between two numbers, the function uses banker’s rounding. +- If the rounding number is halfway between two numbers, the function uses banker’s rounding. Banker's rounding is a method of rounding fractional numbers. When the rounding number is halfway between two numbers, it's rounded to the nearest even digit at the specified decimal position. For example: 3.5 rounds up to 4, 2.5 rounds down to 2. It's the default rounding method for floating point numbers defined in [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest). The [round](#rounding_functions-round) function performs the same rounding for floating point numbers. The `roundBankers` function also rounds integers the same way, for example, `roundBankers(45, -1) = 40`. -- In other cases, the function rounds numbers to the nearest integer. +- In other cases, the function rounds numbers to the nearest integer. Using banker’s rounding, you can reduce the effect that rounding numbers has on the results of summing or subtracting these numbers. For example, sum numbers 1.5, 2.5, 3.5, 4.5 with different rounding: -- No rounding: 1.5 + 2.5 + 3.5 + 4.5 = 12. -- Banker’s rounding: 2 + 2 + 4 + 4 = 12. -- Rounding to the nearest integer: 2 + 3 + 4 + 5 = 14. +- No rounding: 1.5 + 2.5 + 3.5 + 4.5 = 12. +- Banker’s rounding: 2 + 2 + 4 + 4 = 12. +- Rounding to the nearest integer: 2 + 3 + 4 + 5 = 14. **Syntax** @@ -131,11 +131,11 @@ roundBankers(expression [, decimal_places]) **Arguments** -- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). -- `decimal-places` — Decimal places. An integer number. - - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. - - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. - - `decimal-places = 0` — The function rounds the number to an integer. In this case the argument can be omitted. Example: `roundBankers(2.5) = 2`. +- `expression` — A number to be rounded. Can be any [expression](../../sql-reference/syntax.md#syntax-expressions) returning the numeric [data type](../../sql-reference/data-types/index.md#data_types). +- `decimal-places` — Decimal places. An integer number. + - `decimal-places > 0` — The function rounds the number to the given position right of the decimal point. Example: `roundBankers(3.55, 1) = 3.6`. + - `decimal-places < 0` — The function rounds the number to the given position left of the decimal point. Example: `roundBankers(24.55, -1) = 20`. + - `decimal-places = 0` — The function rounds the number to an integer. In this case the argument can be omitted. Example: `roundBankers(2.5) = 2`. **Returned value** @@ -182,7 +182,7 @@ roundBankers(10.755, 2) = 10.76 **See Also** -- [round](#rounding_functions-round) +- [round](#rounding_functions-round) ## roundToExp2(num) @@ -194,7 +194,14 @@ Accepts a number. If the number is less than one, it returns 0. Otherwise, it ro ## roundAge(num) -Accepts a number. If the number is less than 18, it returns 0. Otherwise, it rounds the number down to a number from the set: 18, 25, 35, 45, 55. +Accepts a number. If the number is +- smaller than 1, it returns 0, +- between 1 and 17, it returns 17, +- between 18 and 24, it returns 18, +- between 25 and 34, it returns 25, +- between 35 and 44, it returns 35, +- between 45 and 54, it returns 45, +- larger than 55, it returns 55. ## roundDown(num, arr) diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 6015bb79b87..7336e53fc24 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -1,14 +1,14 @@ --- slug: /en/sql-reference/functions/splitting-merging-functions -sidebar_position: 47 -sidebar_label: Splitting and Merging Strings and Arrays +sidebar_position: 165 +sidebar_label: Splitting Strings --- -# Functions for Splitting and Merging Strings and Arrays +# Functions for Splitting Strings -## splitByChar(separator, s[, max_substrings]) +## splitByChar -Splits a string into substrings separated by a specified character. It uses a constant string `separator` which consists of exactly one character. +Splits a string into substrings separated by a specified character. Uses a constant string `separator` which consists of exactly one character. Returns an array of selected substrings. Empty substrings may be selected if the separator occurs at the beginning or end of the string, or if there are multiple consecutive separators. **Syntax** @@ -19,17 +19,17 @@ splitByChar(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** Returns an array of selected substrings. Empty substrings may be selected when: -- A separator occurs at the beginning or end of the string; -- There are multiple consecutive separators; -- The original string `s` is empty. +- A separator occurs at the beginning or end of the string; +- There are multiple consecutive separators; +- The original string `s` is empty. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). @@ -39,13 +39,15 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere SELECT splitByChar(',', '1,2,3,abcde'); ``` +Result: + ``` text ┌─splitByChar(',', '1,2,3,abcde')─┐ │ ['1','2','3','abcde'] │ └─────────────────────────────────┘ ``` -## splitByString(separator, s[, max_substrings]) +## splitByString Splits a string into substrings separated by a string. It uses a constant string `separator` of multiple characters as the separator. If the string `separator` is empty, it will split the string `s` into an array of single characters. @@ -57,9 +59,9 @@ splitByString(separator, s[, max_substrings])) **Arguments** -- `separator` — The separator. [String](../../sql-reference/data-types/string.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `separator` — The separator. [String](../../sql-reference/data-types/string.md). +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** @@ -68,9 +70,9 @@ Returns an array of selected substrings. Empty substrings may be selected when: Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). -- A non-empty separator occurs at the beginning or end of the string; -- There are multiple consecutive non-empty separators; -- The original string `s` is empty while the separator is not empty. +- A non-empty separator occurs at the beginning or end of the string; +- There are multiple consecutive non-empty separators; +- The original string `s` is empty while the separator is not empty. **Example** @@ -78,6 +80,8 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere SELECT splitByString(', ', '1, 2 3, 4,5, abcde'); ``` +Result: + ``` text ┌─splitByString(', ', '1, 2 3, 4,5, abcde')─┐ │ ['1','2 3','4,5','abcde'] │ @@ -88,13 +92,15 @@ SELECT splitByString(', ', '1, 2 3, 4,5, abcde'); SELECT splitByString('', 'abcde'); ``` +Result: + ``` text ┌─splitByString('', 'abcde')─┐ │ ['a','b','c','d','e'] │ └────────────────────────────┘ ``` -## splitByRegexp(regexp, s[, max_substrings]) +## splitByRegexp Splits a string into substrings separated by a regular expression. It uses a regular expression string `regexp` as the separator. If the `regexp` is empty, it will split the string `s` into an array of single characters. If no match is found for this regular expression, the string `s` won't be split. @@ -106,25 +112,23 @@ splitByRegexp(regexp, s[, max_substrings])) **Arguments** -- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** Returns an array of selected substrings. Empty substrings may be selected when: -- A non-empty regular expression match occurs at the beginning or end of the string; -- There are multiple consecutive non-empty regular expression matches; -- The original string `s` is empty while the regular expression is not empty. +- A non-empty regular expression match occurs at the beginning or end of the string; +- There are multiple consecutive non-empty regular expression matches; +- The original string `s` is empty while the regular expression is not empty. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). **Example** -Query: - ``` sql SELECT splitByRegexp('\\d+', 'a12bc23de345f'); ``` @@ -137,8 +141,6 @@ Result: └────────────────────────────────────────┘ ``` -Query: - ``` sql SELECT splitByRegexp('', 'abcde'); ``` @@ -151,7 +153,7 @@ Result: └────────────────────────────┘ ``` -## splitByWhitespace(s[, max_substrings]) +## splitByWhitespace Splits a string into substrings separated by whitespace characters. Returns an array of selected substrings. @@ -164,8 +166,8 @@ splitByWhitespace(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** @@ -180,13 +182,15 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere SELECT splitByWhitespace(' 1! a, b. '); ``` +Result: + ``` text ┌─splitByWhitespace(' 1! a, b. ')─┐ │ ['1!','a,','b.'] │ └─────────────────────────────────────┘ ``` -## splitByNonAlpha(s[, max_substrings]) +## splitByNonAlpha Splits a string into substrings separated by whitespace and punctuation characters. Returns an array of selected substrings. @@ -199,8 +203,8 @@ splitByNonAlpha(s[, max_substrings])) **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** @@ -221,23 +225,32 @@ SELECT splitByNonAlpha(' 1! a, b. '); └───────────────────────────────────┘ ``` -## arrayStringConcat(arr\[, separator\]) +## arrayStringConcat Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default. Returns the string. +**Syntax** + +```sql +arrayStringConcat(arr\[, separator\]) +``` + **Example** ``` sql SELECT arrayStringConcat(['12/05/2021', '12:50:00'], ' ') AS DateString; ``` + +Result: + ```text ┌─DateString──────────┐ │ 12/05/2021 12:50:00 │ └─────────────────────┘ ``` -## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings]) +## alphaTokens Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings. @@ -245,13 +258,14 @@ Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an a ``` sql alphaTokens(s[, max_substrings])) -splitByAlpha(s[, max_substrings]) ``` +Alias: `splitByAlpha` + **Arguments** -- `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `s` — The string to split. [String](../../sql-reference/data-types/string.md). +- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. **Returned value(s)** @@ -271,7 +285,7 @@ SELECT alphaTokens('abca1abc'); └─────────────────────────┘ ``` -## extractAllGroups(text, regexp) +## extractAllGroups Extracts all groups from non-overlapping substrings matched by a regular expression. @@ -283,21 +297,19 @@ extractAllGroups(text, regexp) **Arguments** -- `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). -- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `text` — [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). +- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md). **Returned values** -- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). +- If the function finds at least one matching group, it returns `Array(Array(String))` column, clustered by group_id (1 to N, where N is number of capturing groups in `regexp`). -- If there is no matching group, returns an empty array. +- If there is no matching group, returns an empty array. Type: [Array](../data-types/array.md). **Example** -Query: - ``` sql SELECT extractAllGroups('abc=123, 8="hkl"', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); ``` @@ -312,7 +324,7 @@ Result: ## ngrams -Splits the UTF-8 string into n-grams of `ngramsize` symbols. +Splits a UTF-8 string into n-grams of `ngramsize` symbols. **Syntax** @@ -322,19 +334,17 @@ ngrams(string, ngramsize) **Arguments** -- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md). +- `string` — String. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `ngramsize` — The size of an n-gram. [UInt](../../sql-reference/data-types/int-uint.md). **Returned values** -- Array with n-grams. +- Array with n-grams. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). **Example** -Query: - ``` sql SELECT ngrams('ClickHouse', 3); ``` @@ -353,18 +363,16 @@ Splits a string into tokens using non-alphanumeric ASCII characters as separator **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. **Returned value** -- The resulting array of tokens from input string. +- The resulting array of tokens from input string. Type: [Array](../data-types/array.md). **Example** -Query: - ``` sql SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens; ``` diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index f3c5b20f886..c543cda5ae2 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1,50 +1,46 @@ --- slug: /en/sql-reference/functions/string-functions -sidebar_position: 40 +sidebar_position: 170 sidebar_label: Strings --- # Functions for Working with Strings -:::note -Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [replacing](../../sql-reference/functions/string-replace-functions.md) in strings are described separately. -::: +Functions for [searching](string-search-functions.md) in strings and for [replacing](string-replace-functions.md) in strings are described separately. ## empty Checks whether the input string is empty. +A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. + +The function is also available for [arrays](array-functions.md#function-empty) and [UUIDs](uuid-functions.md#empty). + **Syntax** ``` sql empty(x) ``` -A string is considered non-empty if it contains at least one byte, even if this is a space or a null byte. - -The function also works for [arrays](array-functions.md#function-empty) or [UUID](uuid-functions.md#empty). - **Arguments** -- `x` — Input value. [String](../data-types/string.md). +- `x` — Input value. [String](../data-types/string.md). **Returned value** -- Returns `1` for an empty string or `0` for a non-empty string. +- Returns `1` for an empty string or `0` for a non-empty string. Type: [UInt8](../data-types/int-uint.md). **Example** -Query: - ```sql SELECT empty(''); ``` Result: -```text +```result ┌─empty('')─┐ │ 1 │ └───────────┘ @@ -54,37 +50,35 @@ Result: Checks whether the input string is non-empty. +A string is considered non-empty if it contains at least one byte, even if this byte is a space or the null byte. + +The function is also available for [arrays](array-functions.md#function-notempty) and [UUIDs](uuid-functions.md#notempty). + **Syntax** ``` sql notEmpty(x) ``` -A string is considered non-empty if it contains at least one byte, even if this is a space or a null byte. - -The function also works for [arrays](array-functions.md#function-notempty) or [UUID](uuid-functions.md#notempty). - **Arguments** -- `x` — Input value. [String](../data-types/string.md). +- `x` — Input value. [String](../data-types/string.md). **Returned value** -- Returns `1` for a non-empty string or `0` for an empty string string. +- Returns `1` for a non-empty string or `0` for an empty string string. Type: [UInt8](../data-types/int-uint.md). **Example** -Query: - ```sql SELECT notEmpty('text'); ``` Result: -```text +```result ┌─notEmpty('text')─┐ │ 1 │ └──────────────────┘ @@ -92,58 +86,51 @@ Result: ## length -Returns the length of a string in bytes (not in characters, and not in code points). -The result type is UInt64. +Returns the length of a string in bytes (not: in characters or Unicode code points). + The function also works for arrays. ## lengthUTF8 -Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it does not throw an exception). -The result type is UInt64. +Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -## char_length, CHAR_LENGTH - -Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it does not throw an exception). -The result type is UInt64. - -## character_length, CHARACTER_LENGTH - -Returns the length of a string in Unicode code points (not in characters), assuming that the string contains a set of bytes that make up UTF-8 encoded text. If this assumption is not met, it returns some result (it does not throw an exception). -The result type is UInt64. +Alias: +- `CHAR_LENGTH`` +- `CHARACTER_LENGTH` ## leftPad -Pads the current string from the left with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Similarly to the MySQL `LPAD` function. +Pads a string from the left with spaces or with a specified string (multiple times, if needed) until the resulting string reaches the specified `length`. **Syntax** ``` sql -leftPad('string', 'length'[, 'pad_string']) +leftPad(string, length[, pad_string]) ``` +Alias: `LPAD` + **Arguments** -- `string` — Input string that needs to be padded. [String](../data-types/string.md). -- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters. -- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. +- `string` — Input string that should be padded. [String](../data-types/string.md). +- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is smaller than the input string length, then the input string is shortened to `length` characters. +- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. **Returned value** -- The resulting string of the given length. +- A left-padded string of the given length. Type: [String](../data-types/string.md). **Example** -Query: - ``` sql SELECT leftPad('abc', 7, '*'), leftPad('def', 7); ``` Result: -``` text +```result ┌─leftPad('abc', 7, '*')─┬─leftPad('def', 7)─┐ │ ****abc │ def │ └────────────────────────┴───────────────────┘ @@ -151,37 +138,35 @@ Result: ## leftPadUTF8 -Pads the current string from the left with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Similarly to the MySQL `LPAD` function. While in the [leftPad](#leftpad) function the length is measured in bytes, here in the `leftPadUTF8` function it is measured in code points. +Pads the string from the left with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Unlike [leftPad](#leftpad) which measures the string length in bytes, the string length is measured in code points. **Syntax** ``` sql -leftPadUTF8('string','length'[, 'pad_string']) +leftPadUTF8(string, length[, pad_string]) ``` **Arguments** -- `string` — Input string that needs to be padded. [String](../data-types/string.md). -- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters. -- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. +- `string` — Input string that should be padded. [String](../data-types/string.md). +- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is smaller than the input string length, then the input string is shortened to `length` characters. +- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. **Returned value** -- The resulting string of the given length. +- A left-padded string of the given length. Type: [String](../data-types/string.md). **Example** -Query: - ``` sql SELECT leftPadUTF8('абвг', 7, '*'), leftPadUTF8('дежз', 7); ``` Result: -``` text +```result ┌─leftPadUTF8('абвг', 7, '*')─┬─leftPadUTF8('дежз', 7)─┐ │ ***абвг │ дежз │ └─────────────────────────────┴────────────────────────┘ @@ -189,37 +174,37 @@ Result: ## rightPad -Pads the current string from the right with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Similarly to the MySQL `RPAD` function. +Pads a string from the right with spaces or with a specified string (multiple times, if needed) until the resulting string reaches the specified `length`. **Syntax** ``` sql -rightPad('string', 'length'[, 'pad_string']) +rightPad(string, length[, pad_string]) ``` +Alias: `RPAD` + **Arguments** -- `string` — Input string that needs to be padded. [String](../data-types/string.md). -- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters. -- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. +- `string` — Input string that should be padded. [String](../data-types/string.md). +- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is smaller than the input string length, then the input string is shortened to `length` characters. +- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. **Returned value** -- The resulting string of the given length. +- A left-padded string of the given length. Type: [String](../data-types/string.md). **Example** -Query: - ``` sql SELECT rightPad('abc', 7, '*'), rightPad('abc', 7); ``` Result: -``` text +```result ┌─rightPad('abc', 7, '*')─┬─rightPad('abc', 7)─┐ │ abc**** │ abc │ └─────────────────────────┴────────────────────┘ @@ -227,81 +212,89 @@ Result: ## rightPadUTF8 -Pads the current string from the right with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Similarly to the MySQL `RPAD` function. While in the [rightPad](#rightpad) function the length is measured in bytes, here in the `rightPadUTF8` function it is measured in code points. +Pads the string from the right with spaces or a specified string (multiple times, if needed) until the resulting string reaches the given length. Unlike [rightPad](#rightpad) which measures the string length in bytes, the string length is measured in code points. **Syntax** ``` sql -rightPadUTF8('string','length'[, 'pad_string']) +rightPadUTF8(string, length[, pad_string]) ``` **Arguments** -- `string` — Input string that needs to be padded. [String](../data-types/string.md). -- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is less than the input string length, then the input string is shortened to `length` characters. -- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. +- `string` — Input string that should be padded. [String](../data-types/string.md). +- `length` — The length of the resulting string. [UInt or Int](../data-types/int-uint.md). If the value is smaller than the input string length, then the input string is shortened to `length` characters. +- `pad_string` — The string to pad the input string with. [String](../data-types/string.md). Optional. If not specified, then the input string is padded with spaces. **Returned value** -- The resulting string of the given length. +- A right-padded string of the given length. Type: [String](../data-types/string.md). **Example** -Query: - ``` sql SELECT rightPadUTF8('абвг', 7, '*'), rightPadUTF8('абвг', 7); ``` Result: -``` text +```result ┌─rightPadUTF8('абвг', 7, '*')─┬─rightPadUTF8('абвг', 7)─┐ │ абвг*** │ абвг │ └──────────────────────────────┴─────────────────────────┘ ``` -## lower, lcase +## lower -Converts ASCII Latin symbols in a string to lowercase. +Converts the ASCII Latin symbols in a string to lowercase. -## upper, ucase +Alias: `lcase` -Converts ASCII Latin symbols in a string to uppercase. +## upper + +Converts the ASCII Latin symbols in a string to uppercase. + +Alias: `ucase` ## lowerUTF8 -Converts a string to lowercase, assuming the string contains a set of bytes that make up a UTF-8 encoded text. -It does not detect the language. E.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). + If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. -If the string contains a sequence of bytes that are not valid UTF-8, then the behavior is undefined. ## upperUTF8 -Converts a string to uppercase, assuming the string contains a set of bytes that make up a UTF-8 encoded text. -It does not detect the language. E.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). +Converts a string to uppercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). + If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. -If the string contains a sequence of bytes that are not valid UTF-8, then the behavior is undefined. ## isValidUTF8 -Returns 1, if the set of bytes is valid UTF-8 encoded, otherwise 0. +Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0. ## toValidUTF8 Replaces invalid UTF-8 characters by the `�` (U+FFFD) character. All running in a row invalid characters are collapsed into the one replacement character. +**Syntax** + ``` sql toValidUTF8(input_string) ``` **Arguments** -- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. +- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object. -Returned value: Valid UTF-8 string. +**Returned value** + +- A valid UTF-8 string. **Example** @@ -309,7 +302,7 @@ Returned value: Valid UTF-8 string. SELECT toValidUTF8('\x61\xF0\x80\x80\x80b'); ``` -``` text +```result ┌─toValidUTF8('a����b')─┐ │ a�b │ └───────────────────────┘ @@ -317,9 +310,7 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b'); ## repeat -Repeats a string as many times as specified and concatenates the replicated values as a single string. - -Alias: `REPEAT`. +Conatenates a string as many times with itself as specified. **Syntax** @@ -327,28 +318,28 @@ Alias: `REPEAT`. repeat(s, n) ``` +Alias: `REPEAT` + **Arguments** -- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). -- `n` — The number of times to repeat the string. [UInt or Int](../../sql-reference/data-types/int-uint.md). +- `s` — The string to repeat. [String](../../sql-reference/data-types/string.md). +- `n` — The number of times to repeat the string. [UInt or Int](../../sql-reference/data-types/int-uint.md). **Returned value** -The single string, which contains the string `s` repeated `n` times. If `n` \< 1, the function returns empty string. +The single string containing string `s` repeated `n` times. If `n` \< 1, the function returns empty string. Type: `String`. **Example** -Query: - ``` sql SELECT repeat('abc', 10); ``` Result: -``` text +```result ┌─repeat('abc', 10)──────────────┐ │ abcabcabcabcabcabcabcabcabcabc │ └────────────────────────────────┘ @@ -356,31 +347,41 @@ Result: ## reverse -Reverses the string (as a sequence of bytes). +Reverses the sequence of bytes in a string. ## reverseUTF8 -Reverses a sequence of Unicode code points, assuming that the string contains a set of bytes representing a UTF-8 text. Otherwise, it does something else (it does not throw an exception). +Reverses a sequence of Unicode code points in a string. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. -## format(pattern, s0, s1, …) +## format -Formatting constant pattern with the string listed in the arguments. `pattern` is a simplified Python format pattern. Format string contains “replacement fields” surrounded by curly braces `{}`. Anything that is not contained in braces is considered literal text, which is copied unchanged to the output. If you need to include a brace character in the literal text, it can be escaped by doubling: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are treated as consequence numbers). +Format the `pattern` string with the strings listed in the arguments, similar to formatting in Python. The pattern string can contain replacement fields surrounded by curly braces `{}`. Anything not contained in braces is considered literal text and copied verbatim into the output. Literal brace character can be escaped by two braces: `{{ '{{' }}` and `{{ '}}' }}`. Field names can be numbers (starting from zero) or empty (then they are implicitely given monotonically increasing numbers). + +**Syntax** + +```sql +format(pattern, s0, s1, …) +``` + +**Example** ``` sql SELECT format('{1} {0} {1}', 'World', 'Hello') ``` -``` text +```result ┌─format('{1} {0} {1}', 'World', 'Hello')─┐ │ Hello World Hello │ └─────────────────────────────────────────┘ ``` +With implicit numbers: + ``` sql SELECT format('{} {}', 'Hello', 'World') ``` -``` text +```result ┌─format('{} {}', 'Hello', 'World')─┐ │ Hello World │ └───────────────────────────────────┘ @@ -388,7 +389,7 @@ SELECT format('{} {}', 'Hello', 'World') ## concat -Concatenates the strings listed in the arguments, without a separator. +Concatenates the strings listed in the arguments without separator. **Syntax** @@ -402,21 +403,19 @@ Values of type String or FixedString. **Returned values** -Returns the String that results from concatenating the arguments. +The String created by concatenating the arguments. -If any of argument values is `NULL`, `concat` returns `NULL`. +If any of arguments is `NULL`, the function returns `NULL`. **Example** -Query: - ``` sql SELECT concat('Hello, ', 'World!'); ``` Result: -``` text +```result ┌─concat('Hello, ', 'World!')─┐ │ Hello, World! │ └─────────────────────────────┘ @@ -424,9 +423,9 @@ Result: ## concatAssumeInjective -Same as [concat](#concat), the difference is that you need to ensure that `concat(s1, s2, ...) → sn` is injective, it will be used for optimization of GROUP BY. +Like [concat](#concat) but assumes that `concat(s1, s2, ...) → sn` is injective. Can be used for optimization of GROUP BY. -The function is named “injective” if it always returns different result for different values of arguments. In other words: different arguments never yield identical result. +A function is called injective if it returns for different arguments different results. In other words: different arguments never produce identical result. **Syntax** @@ -440,9 +439,9 @@ Values of type String or FixedString. **Returned values** -Returns the String that results from concatenating the arguments. +The String created by concatenating the arguments. -If any of argument values is `NULL`, `concatAssumeInjective` returns `NULL`. +If any of argument values is `NULL`, the function returns `NULL`. **Example** @@ -454,7 +453,7 @@ INSERT INTO key_val VALUES ('Hello, ','World',1), ('Hello, ','World',2), ('Hello SELECT * from key_val; ``` -``` text +```result ┌─key1────┬─key2─────┬─value─┐ │ Hello, │ World │ 1 │ │ Hello, │ World │ 2 │ @@ -463,15 +462,13 @@ SELECT * from key_val; └─────────┴──────────┴───────┘ ``` -Query: - ``` sql SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY concatAssumeInjective(key1, key2); ``` Result: -``` text +```result ┌─concat(key1, key2)─┬─sum(value)─┐ │ Hello, World! │ 3 │ │ Hello, World! │ 2 │ @@ -479,25 +476,88 @@ Result: └────────────────────┴────────────┘ ``` -## substring(s, offset, length), mid(s, offset, length), substr(s, offset, length) +## concatWithSeparator -Returns a substring starting with the byte from the ‘offset’ index that is ‘length’ bytes long. Character indexing starts from one (as in standard SQL). +Concatenates the given strings with a given separator. -## substringUTF8(s, offset, length) +**Syntax** -The same as ‘substring’, but for Unicode code points. Works under the assumption that the string contains a set of bytes representing a UTF-8 encoded text. If this assumption is not met, it returns some result (it does not throw an exception). +``` sql +concatWithSeparator(sep, expr1, expr2, expr3...) +``` -## appendTrailingCharIfAbsent(s, c) +**Arguments** -If the ‘s’ string is non-empty and does not contain the ‘c’ character at the end, it appends the ‘c’ character to the end. +- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -## convertCharset(s, from, to) +**Returned values** -Returns the string ‘s’ that was converted from the encoding in ‘from’ to the encoding in ‘to’. +The String created by concatenating the arguments. -## base58Encode(plaintext) +If any of the argument values is `NULL`, the function returns `NULL`. -Accepts a String and encodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet. +**Example** + +``` sql +SELECT concatWithSeparator('a', '1', '2', '3', '4') +``` + +Result: + +```result +┌─concatWithSeparator('a', '1', '2', '3', '4')─┐ +│ 1a2a3a4 │ +└───────────────────────────────────┘ +``` + +## concatWithSeparatorAssumeInjective + +Like `concatWithSeparator` but assumes that `concatWithSeparator(sep, expr1, expr2, expr3...) → result` is injective. Can be used for optimization of GROUP BY. + +A function is called injective if it returns for different arguments different results. In other words: different arguments never produce identical result. + +## substring(s, offset, length) + +Returns a substring with `length` many bytes, starting at the byte at index `offset`. Character indexing starts from 1. + +**Syntax** + +```sql +substring(s, offset, length) +``` + +Alias: +- `substr` +- `mid` + +## substringUTF8 + +Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +## appendTrailingCharIfAbsent + +Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`. + +**Syntax** + +```sql +appendTrailingCharIfAbsent(s, c) +``` + +## convertCharset + +Returns string `s` converted from the encoding `from` to encoding `to`. + +**Syntax** + +```sql +convertCharset(s, from, to) +``` + +## base58Encode + +Encodes a String using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) in the "Bitcoin" alphabet. **Syntax** @@ -511,117 +571,107 @@ base58Encode(plaintext) **Returned value** -- A string containing encoded value of 1st argument. +- A string containing the encoded value of the argument. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT base58Encode('Encoded'); ``` Result: -```text + +```result ┌─base58Encode('Encoded')─┐ │ 3dc8KtHrwM │ └─────────────────────────┘ ``` -## base58Decode(encoded_text) +## base58Decode Accepts a String and decodes it using [Base58](https://tools.ietf.org/id/draft-msporny-base58-01.html) encoding scheme using "Bitcoin" alphabet. **Syntax** ```sql -base58Decode(encoded_text) +base58Decode(encoded) ``` **Arguments** -- `encoded_text` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid base58-encoded value, an exception is thrown. +- `encoded` — [String](../../sql-reference/data-types/string.md) column or constant. If the string is not a valid Base58-encoded value, an exception is thrown. **Returned value** -- A string containing decoded value of 1st argument. +- A string containing the decoded value of the argument. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT base58Decode('3dc8KtHrwM'); ``` Result: -```text + +```result ┌─base58Decode('3dc8KtHrwM')─┐ │ Encoded │ └────────────────────────────┘ ``` -## tryBase58Decode(s) +## tryBase58Decode -Similar to base58Decode, but returns an empty string in case of error. +Like `base58Decode` but returns an empty string in case of error. -## base64Encode(s) +## base64Encode -Encodes ‘s’ FixedString or String into base64. +Encodes a String or FixedString as base64. Alias: `TO_BASE64`. -## base64Decode(s) +## base64Decode -Decode base64-encoded FixedString or String ‘s’ into original string. In case of failure raises an exception. +Decodes a base64-encoded String or FixedString. Throws an exception in case of error. Alias: `FROM_BASE64`. -## tryBase64Decode(s) +## tryBase64Decode -Similar to base64Decode, but returns an empty string in case of error. +Like `base64Decode` but returns an empty string in case of error. -## endsWith(s, suffix) +## endsWith -Returns whether to end with the specified suffix. Returns 1 if the string ends with the specified suffix, otherwise it returns 0. +Returns whether string `str` ends with `suffix`. -## startsWith(str, prefix) +**Syntax** -Returns 1 whether string starts with the specified prefix, otherwise it returns 0. +```sql +endsWith(str, suffix) +``` + +## startsWith + +Returns whether string `str` starts with `prefix`. + +**Syntax** + +```sql +startsWith(str, prefix) +``` + +**Example** ``` sql SELECT startsWith('Spider-Man', 'Spi'); ``` -**Returned values** - -- 1, if the string starts with the specified prefix. -- 0, if the string does not start with the specified prefix. - -**Example** - -Query: - -``` sql -SELECT startsWith('Hello, world!', 'He'); -``` - -Result: - -``` text -┌─startsWith('Hello, world!', 'He')─┐ -│ 1 │ -└───────────────────────────────────┘ -``` - ## trim -Removes all specified characters from the start or end of a string. -By default removes all consecutive occurrences of common whitespace (ASCII character 32) from both ends of a string. +Removes the specified characters from the start or end of a string. If not specified otherwise, the function removes whitespace (ASCII-character 32). **Syntax** @@ -631,26 +681,24 @@ trim([[LEADING|TRAILING|BOTH] trim_character FROM] input_string) **Arguments** -- `trim_character` — Specified characters for trim. [String](../../sql-reference/data-types/string.md). -- `input_string` — String for trim. [String](../../sql-reference/data-types/string.md). +- `trim_character` — Specified characters for trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — String for trim. [String](../../sql-reference/data-types/string.md). **Returned value** -A string without leading and (or) trailing specified characters. +A string without leading and/or trailing specified characters. Type: `String`. **Example** -Query: - ``` sql SELECT trim(BOTH ' ()' FROM '( Hello, world! )'); ``` Result: -``` text +```result ┌─trim(BOTH ' ()' FROM '( Hello, world! )')─┐ │ Hello, world! │ └───────────────────────────────────────────────┘ @@ -658,7 +706,7 @@ Result: ## trimLeft -Removes all consecutive occurrences of common whitespace (ASCII character 32) from the beginning of a string. It does not remove other kinds of whitespace characters (tab, no-break space, etc.). +Removes the consecutive occurrences of whitespace (ASCII-character 32) from the start of a string. **Syntax** @@ -670,7 +718,7 @@ Alias: `ltrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -680,15 +728,13 @@ Type: `String`. **Example** -Query: - ``` sql SELECT trimLeft(' Hello, world! '); ``` Result: -``` text +```result ┌─trimLeft(' Hello, world! ')─┐ │ Hello, world! │ └─────────────────────────────────────┘ @@ -696,7 +742,7 @@ Result: ## trimRight -Removes all consecutive occurrences of common whitespace (ASCII character 32) from the end of a string. It does not remove other kinds of whitespace characters (tab, no-break space, etc.). +Removes the consecutive occurrences of whitespace (ASCII-character 32) from the end of a string. **Syntax** @@ -708,7 +754,7 @@ Alias: `rtrim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -718,15 +764,13 @@ Type: `String`. **Example** -Query: - ``` sql SELECT trimRight(' Hello, world! '); ``` Result: -``` text +```result ┌─trimRight(' Hello, world! ')─┐ │ Hello, world! │ └──────────────────────────────────────┘ @@ -734,7 +778,7 @@ Result: ## trimBoth -Removes all consecutive occurrences of common whitespace (ASCII character 32) from both ends of a string. It does not remove other kinds of whitespace characters (tab, no-break space, etc.). +Removes the consecutive occurrences of whitespace (ASCII-character 32) from both ends of a string. **Syntax** @@ -746,7 +790,7 @@ Alias: `trim(input_string)`. **Arguments** -- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). +- `input_string` — string to trim. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -756,33 +800,31 @@ Type: `String`. **Example** -Query: - ``` sql SELECT trimBoth(' Hello, world! '); ``` Result: -``` text +```result ┌─trimBoth(' Hello, world! ')─┐ │ Hello, world! │ └─────────────────────────────────────┘ ``` -## CRC32(s) +## CRC32 -Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation). +Returns the CRC32 checksum of a string using CRC-32-IEEE 802.3 polynomial and initial value `0xffffffff` (zlib implementation). The result type is UInt32. -## CRC32IEEE(s) +## CRC32IEEE Returns the CRC32 checksum of a string, using CRC-32-IEEE 802.3 polynomial. The result type is UInt32. -## CRC64(s) +## CRC64 Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial. @@ -800,25 +842,23 @@ normalizeQuery(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). **Returned value** -- Sequence of characters with placeholders. +- Sequence of characters with placeholders. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT normalizeQuery('[1, 2, 3, x]') AS query; ``` Result: -``` text +```result ┌─query────┐ │ [?.., x] │ └──────────┘ @@ -826,7 +866,7 @@ Result: ## normalizedQueryHash -Returns identical 64bit hash values without the values of literals for similar queries. It helps to analyze query log. +Returns identical 64bit hash values without the values of literals for similar queries. Can be helpful to analyze query log. **Syntax** @@ -836,25 +876,23 @@ normalizedQueryHash(x) **Arguments** -- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). **Returned value** -- Hash value. +- Hash value. Type: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges). **Example** -Query: - ``` sql SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res; ``` Result: -``` text +```result ┌─res─┐ │ 1 │ └─────┘ @@ -862,7 +900,7 @@ Result: ## normalizeUTF8NFC -Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text. +Converts a string to [NFC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. **Syntax** @@ -872,25 +910,23 @@ normalizeUTF8NFC(words) **Arguments** -- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- String transformed to NFC normalization form. +- String transformed to NFC normalization form. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT length('â'), normalizeUTF8NFC('â') AS nfc, length(nfc) AS nfc_len; ``` Result: -``` text +```result ┌─length('â')─┬─nfc─┬─nfc_len─┐ │ 2 │ â │ 2 │ └─────────────┴─────┴─────────┘ @@ -898,7 +934,7 @@ Result: ## normalizeUTF8NFD -Converts a string to [NFD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text. +Converts a string to [NFD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. **Syntax** @@ -908,25 +944,23 @@ normalizeUTF8NFD(words) **Arguments** -- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- String transformed to NFD normalization form. +- String transformed to NFD normalization form. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT length('â'), normalizeUTF8NFD('â') AS nfd, length(nfd) AS nfd_len; ``` Result: -``` text +```result ┌─length('â')─┬─nfd─┬─nfd_len─┐ │ 2 │ â │ 3 │ └─────────────┴─────┴─────────┘ @@ -934,7 +968,7 @@ Result: ## normalizeUTF8NFKC -Converts a string to [NFKC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text. +Converts a string to [NFKC normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. **Syntax** @@ -944,25 +978,23 @@ normalizeUTF8NFKC(words) **Arguments** -- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- String transformed to NFKC normalization form. +- String transformed to NFKC normalization form. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT length('â'), normalizeUTF8NFKC('â') AS nfkc, length(nfkc) AS nfkc_len; ``` Result: -``` text +```result ┌─length('â')─┬─nfkc─┬─nfkc_len─┐ │ 2 │ â │ 2 │ └─────────────┴──────┴──────────┘ @@ -970,7 +1002,7 @@ Result: ## normalizeUTF8NFKD -Converts a string to [NFKD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string contains a set of bytes that make up a UTF-8 encoded text. +Converts a string to [NFKD normalized form](https://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms), assuming the string is valid UTF8-encoded text. **Syntax** @@ -980,25 +1012,23 @@ normalizeUTF8NFKD(words) **Arguments** -- `words` — Input string that contains UTF-8 encoded text. [String](../../sql-reference/data-types/string.md). +- `words` — UTF8-encoded input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- String transformed to NFKD normalization form. +- String transformed to NFKD normalization form. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT length('â'), normalizeUTF8NFKD('â') AS nfkd, length(nfkd) AS nfkd_len; ``` Result: -``` text +```result ┌─length('â')─┬─nfkd─┬─nfkd_len─┐ │ 2 │ â │ 3 │ └─────────────┴──────┴──────────┘ @@ -1006,9 +1036,10 @@ Result: ## encodeXMLComponent -Escapes characters to place string into XML text node or attribute. +Escapes characters with special meaning in XML such that they can afterwards be place into a XML text node or attribute. -The following five XML predefined entities will be replaced: `<`, `&`, `>`, `"`, `'`. +The following characters are replaced: `<`, `&`, `>`, `"`, `'`. +Also see the [list of XML and HTML character entity references](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references). **Syntax** @@ -1018,18 +1049,16 @@ encodeXMLComponent(x) **Arguments** -- `x` — The sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- The sequence of characters with escape characters. +- The escaped string. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT encodeXMLComponent('Hello, "world"!'); SELECT encodeXMLComponent('<123>'); @@ -1039,7 +1068,7 @@ SELECT encodeXMLComponent('\'foo\''); Result: -``` text +```result Hello, "world"! <123> &clickhouse @@ -1048,7 +1077,8 @@ Hello, "world"! ## decodeXMLComponent -Replaces XML predefined entities with characters. Predefined entities are `"` `&` `'` `>` `<` +Un-escapes substrings with special meaning in XML. These substrings are: `"` `&` `'` `>` `<` + This function also replaces numeric character references with Unicode characters. Both decimal (like `✓`) and hexadecimal (`✓`) forms are supported. **Syntax** @@ -1059,18 +1089,16 @@ decodeXMLComponent(x) **Arguments** -- `x` — A sequence of characters. [String](../../sql-reference/data-types/string.md). +- `x` — An input string. [String](../../sql-reference/data-types/string.md). **Returned value** -- The sequence of characters after replacement. +- The un-escaped string. Type: [String](../../sql-reference/data-types/string.md). **Example** -Query: - ``` sql SELECT decodeXMLComponent(''foo''); SELECT decodeXMLComponent('< Σ >'); @@ -1078,25 +1106,20 @@ SELECT decodeXMLComponent('< Σ >'); Result: -``` text +```result 'foo' < Σ > ``` -**See Also** - -- [List of XML and HTML character entity references](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references) - - - ## extractTextFromHTML -A function to extract text from HTML or XHTML. -It does not necessarily 100% conform to any of the HTML, XML or XHTML standards, but the implementation is reasonably accurate and it is fast. The rules are the following: +This function extracts plain text from HTML or XHTML. -1. Comments are skipped. Example: ``. Comment must end with `-->`. Nested comments are not possible. +It does not conform 100% to the HTML, XML or XHTML specification but the implementation is reasonably accurate and fast. The rules are the following: + +1. Comments are skipped. Example: ``. Comment must end with `-->`. Nested comments are disallowed. Note: constructions like `` and `` are not valid comments in HTML but they are skipped by other rules. -2. CDATA is pasted verbatim. Note: CDATA is XML/XHTML specific. But it is processed for "best-effort" approach. +2. CDATA is pasted verbatim. Note: CDATA is XML/XHTML-specific and processed on a "best-effort" basis. 3. `script` and `style` elements are removed with all their content. Note: it is assumed that closing tag cannot appear inside content. For example, in JS string literal has to be escaped like `"<\/script>"`. Note: comments and CDATA are possible inside `script` or `style` - then closing tags are not searched inside CDATA. Example: `]]>`. But they are still searched inside comments. Sometimes it becomes complicated: ` var y = "-->"; alert(x + y);` Note: `script` and `style` can be the names of XML namespaces - then they are not treated like usual `script` or `style` elements. Example: `Hello`. @@ -1121,11 +1144,11 @@ extractTextFromHTML(x) **Arguments** -- `x` — input text. [String](../../sql-reference/data-types/string.md). +- `x` — input text. [String](../../sql-reference/data-types/string.md). **Returned value** -- Extracted text. +- Extracted text. Type: [String](../../sql-reference/data-types/string.md). @@ -1135,8 +1158,6 @@ The first example contains several tags and a comment and also shows whitespace The second example shows `CDATA` and `script` tag processing. In the third example text is extracted from the full HTML response received by the [url](../../sql-reference/table-functions/url.md) function. -Query: - ``` sql SELECT extractTextFromHTML('

A text withtags.

'); SELECT extractTextFromHTML('CDATA
]]> '); @@ -1145,54 +1166,145 @@ SELECT extractTextFromHTML(html) FROM url('http://www.donothingfor2minutes.com/' Result: -``` text +```result A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` -## ascii(s) {#ascii} +## ascii {#ascii} -Returns the ASCII code point of the first character of str. The result type is Int32. +Returns the ASCII code point (as Int32) of the first character of string `s`. -If s is empty, the result is 0. If the first character is not an ASCII character or not part of the Latin-1 Supplement range of UTF-16, the result is undefined. +If `s` is empty, the result is 0. If the first character is not an ASCII character or not part of the Latin-1 supplement range of UTF-16, the result is undefined. +**Syntax** +```sql +ascii(s) +``` -## concatWithSeparator +## soundex -Returns the concatenation strings separated by string separator. If any of the argument values is `NULL`, the function returns `NULL`. +Returns the [Soundex code](https://en.wikipedia.org/wiki/Soundex) of a string. **Syntax** ``` sql -concatWithSeparator(sep, expr1, expr2, expr3...) +soundex(val) ``` **Arguments** -- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). -**Returned values** -- The concatenated String. +- `val` - Input value. [String](../data-types/string.md) + +**Returned value** + +- The Soundex code of the input value. [String](../data-types/string.md) **Example** -Query: - ``` sql -SELECT concatWithSeparator('a', '1', '2', '3', '4') +select soundex('aksel'); ``` Result: -``` text -┌─concatWithSeparator('a', '1', '2', '3', '4')─┐ -│ 1a2a3a4 │ -└───────────────────────────────────┘ +```result +┌─soundex('aksel')─┐ +│ A240 │ +└──────────────────┘ ``` -## concatWithSeparatorAssumeInjective -Same as concatWithSeparator, the difference is that you need to ensure that concatWithSeparator(sep, expr1, expr2, expr3...) → result is injective, it will be used for optimization of GROUP BY. +## extractKeyValuePairs -The function is named “injective” if it always returns different result for different values of arguments. In other words: different arguments never yield identical result. +Extracts key-value pairs from any string. The string does not need to be 100% structured in a key value pair format; + +It can contain noise (e.g. log files). The key-value pair format to be interpreted should be specified via function arguments. + +A key-value pair consists of a key followed by a `key_value_delimiter` and a value. Quoted keys and values are also supported. Key value pairs must be separated by pair delimiters. + +**Syntax** +``` sql +extractKeyValuePairs(data, [key_value_delimiter], [pair_delimiter], [quoting_character]) +``` + +**Arguments** +- `data` - String to extract key-value pairs from. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `key_value_delimiter` - Character to be used as delimiter between the key and the value. Defaults to `:`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `pair_delimiters` - Set of character to be used as delimiters between pairs. Defaults to `\space`, `,` and `;`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- `quoting_character` - Character to be used as quoting character. Defaults to `"`. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). + +**Returned values** +- The extracted key-value pairs in a Map(String, String). + +**Examples** + +Query: + +**Simple case** +``` sql +arthur :) select extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv + +SELECT extractKeyValuePairs('name:neymar, age:31 team:psg,nationality:brazil') as kv + +Query id: f9e0ca6f-3178-4ee2-aa2c-a5517abb9cee + +┌─kv──────────────────────────────────────────────────────────────────────┐ +│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil'} │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +**Single quote as quoting character** +``` sql +arthur :) select extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv + +SELECT extractKeyValuePairs('name:\'neymar\';\'age\':31;team:psg;nationality:brazil,last_key:last_value', ':', ';,', '\'') as kv + +Query id: 0e22bf6b-9844-414a-99dc-32bf647abd5e + +┌─kv───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ {'name':'neymar','age':'31','team':'psg','nationality':'brazil','last_key':'last_value'} │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +**Escape sequences without escape sequences support** +``` sql +arthur :) select extractKeyValuePairs('age:a\\x0A\\n\\0') as kv + +SELECT extractKeyValuePairs('age:a\\x0A\\n\\0') AS kv + +Query id: e9fd26ee-b41f-4a11-b17f-25af6fd5d356 + +┌─kv─────────────────────┐ +│ {'age':'a\\x0A\\n\\0'} │ +└────────────────────────┘ +``` + +## extractKeyValuePairsWithEscaping + +Same as `extractKeyValuePairs` but with escaping support. + +Escape sequences supported: `\x`, `\N`, `\a`, `\b`, `\e`, `\f`, `\n`, `\r`, `\t`, `\v` and `\0`. +Non standard escape sequences are returned as it is (including the backslash) unless they are one of the following: +`\\`, `'`, `"`, `backtick`, `/`, `=` or ASCII control characters (c <= 31). + +This function will satisfy the use case where pre-escaping and post-escaping are not suitable. For instance, consider the following +input string: `a: "aaaa\"bbb"`. The expected output is: `a: aaaa\"bbbb`. +- Pre-escaping: Pre-escaping it will output: `a: "aaaa"bbb"` and `extractKeyValuePairs` will then output: `a: aaaa` +- Post-escaping: `extractKeyValuePairs` will output `a: aaaa\` and post-escaping will keep it as it is. + +Leading escape sequences will be skipped in keys and will be considered invalid for values. + +**Escape sequences with escape sequence support turned on** +``` sql +arthur :) select extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') as kv + +SELECT extractKeyValuePairsWithEscaping('age:a\\x0A\\n\\0') AS kv + +Query id: 44c114f0-5658-4c75-ab87-4574de3a1645 + +┌─kv────────────────┐ +│ {'age':'a\n\n\0'} │ +└───────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index 50e15f70f5d..56c527d734e 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -1,34 +1,54 @@ --- slug: /en/sql-reference/functions/string-replace-functions -sidebar_position: 42 +sidebar_position: 150 sidebar_label: Replacing in Strings --- -# Functions for Searching and Replacing in Strings +# Functions for Replacing in Strings -:::note -Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. -::: +[General strings functions](string-functions.md) and [functions for searchin in strings](string-search-functions.md) are described separately. -## replaceOne(haystack, pattern, replacement) +## replaceOne -Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string. -‘pattern’ and ‘replacement’ must be constants. +Replaces the first occurrence of the substring `pattern` in `haystack` by the `replacement` string. -## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement) +**Syntax** -Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string. +```sql +replaceOne(haystack, pattern, replacement) +``` -## replaceRegexpOne(haystack, pattern, replacement) +## replaceAll -Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string. -‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax). -‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`. +Replaces all occurrences of the substring `pattern` in `haystack` by the `replacement` string. + +**Syntax** + +```sql +replaceAll(haystack, pattern, replacement) +``` + +Alias: `replace`. + +## replaceRegexpOne + +Replaces the first occurrence of the substring matching the regular expression `pattern` (in [re2 syntax](https://github.com/google/re2/wiki/Syntax)) in `haystack` by the `replacement` string. + +`replacement` can containing substitutions `\0-\9`. Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match. -To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`. -Also keep in mind that string literals require an extra escaping. -Example 1. Converting ISO dates to American format: +To use a verbatim `\` character in the `pattern` or `replacement` strings, escape it using `\`. +Also keep in mind that string literals require extra escaping. + +**Syntax** + +```sql +replaceRegexpOne(haystack, pattern, replacement) +``` + +**Example** + +Converting ISO dates to American format: ``` sql SELECT DISTINCT @@ -39,6 +59,8 @@ LIMIT 7 FORMAT TabSeparated ``` +Result: + ``` text 2014-03-17 03/17/2014 2014-03-18 03/18/2014 @@ -49,81 +71,91 @@ FORMAT TabSeparated 2014-03-23 03/23/2014 ``` -Example 2. Copying a string ten times: +Copying a string ten times: ``` sql SELECT replaceRegexpOne('Hello, World!', '.*', '\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0') AS res ``` +Result: + ``` text ┌─res────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ │ Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World!Hello, World! │ └────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## replaceRegexpAll(haystack, pattern, replacement) +## replaceRegexpAll -Like ‘replaceRegexpOne‘, but replaces all occurrences of the pattern. Example: +Like `replaceRegexpOne` but replaces all occurrences of the pattern. + +Alias: `REGEXP_REPLACE`. + +**Example** ``` sql SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0') AS res ``` +Result: + ``` text ┌─res────────────────────────┐ │ HHeelllloo,, WWoorrlldd!! │ └────────────────────────────┘ ``` -As an exception, if a regular expression worked on an empty substring, the replacement is not made more than once. -Example: +As an exception, if a regular expression worked on an empty substring, the replacement is not made more than once, e.g.: ``` sql SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res ``` +Result: + ``` text ┌─res─────────────────┐ │ here: Hello, World! │ └─────────────────────┘ ``` -## regexpQuoteMeta(s) +## regexpQuoteMeta + +Adds a backslash before these characters with special meaning in regular expressions: `\0`, `\\`, `|`, `(`, `)`, `^`, `$`, `.`, `[`, `]`, `?`, `*`, `+`, `{`, `:`, `-`. -The function adds a backslash before some predefined characters in the string. -Predefined characters: `\0`, `\\`, `|`, `(`, `)`, `^`, `$`, `.`, `[`, `]`, `?`, `*`, `+`, `{`, `:`, `-`. This implementation slightly differs from re2::RE2::QuoteMeta. It escapes zero byte as `\0` instead of `\x00` and it escapes only required characters. -For more information, see the link: [RE2](https://github.com/google/re2/blob/master/re2/re2.cc#L473) +For more information, see [RE2](https://github.com/google/re2/blob/master/re2/re2.cc#L473) +**Syntax** -## translate(s, from, to) +```sql +regexpQuoteMeta(s) +``` -The function replaces characters in the string ‘s’ in accordance with one-to-one character mapping defined by ‘from’ and ‘to’ strings. ‘from’ and ‘to’ must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified. +## translate -Example: +Replaces characters in the string `s` using a one-to-one character mapping defined by `from` and `to` strings. `from` and `to` must be constant ASCII strings of the same size. Non-ASCII characters in the original string are not modified. + +**Syntax** + +```sql +translate(s, from, to) +``` + +**Example** ``` sql SELECT translate('Hello, World!', 'delor', 'DELOR') AS res ``` +Result: + ``` text ┌─res───────────┐ │ HELLO, WORLD! │ └───────────────┘ ``` -## translateUTF8(string, from, to) +## translateUTF8 -Similar to previous function, but works with UTF-8 arguments. ‘from’ and ‘to’ must be valid constant UTF-8 strings of the same size. - -Example: - -``` sql -SELECT translateUTF8('Hélló, Wórld¡', 'óé¡', 'oe!') AS res -``` - -``` text -┌─res───────────┐ -│ Hello, World! │ -└───────────────┘ -``` +Like [translate](#translate) but assumes `s`, `from` and `to` are UTF-8 encoded strings. diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 38ccb43cbc9..3d8f89f7295 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -1,24 +1,26 @@ --- slug: /en/sql-reference/functions/string-search-functions -sidebar_position: 41 +sidebar_position: 160 sidebar_label: Searching in Strings --- # Functions for Searching in Strings -The search is case-sensitive by default in all these functions. There are separate variants for case insensitive search. +All functions in this section search by default case-sensitively. Case-insensitive search is usually provided by separate function variants. +Note that case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in English language is +`I` whereas in Turkish language it is `İ` - results for languages other than English may be unexpected. -:::note -Functions for [replacing](../../sql-reference/functions/string-replace-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately. -::: +Functions in this section also assume that the searched string and the search string are single-byte encoded text. If this assumption is +violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function +variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the +results are undefined. Note that no automatic Unicode normalization is performed, you can use the +[normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. -## position(haystack, needle), locate(haystack, needle) +[General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately. -Searches for the substring `needle` in the string `haystack`. +## position -Returns the position (in bytes) of the found substring in the string, starting from 1. - -For a case-insensitive search, use the function [positionCaseInsensitive](#positioncaseinsensitive). +Returns the position (in bytes, starting at 1) of a substring `needle` in a string `haystack`. **Syntax** @@ -26,35 +28,33 @@ For a case-insensitive search, use the function [positionCaseInsensitive](#posit position(haystack, needle[, start_pos]) ``` -``` sql -position(needle IN haystack) -``` - -Alias: `locate(haystack, needle[, start_pos])`. - -:::note -Syntax of `position(needle IN haystack)` provides SQL-compatibility, the function works the same way as to `position(haystack, needle)`. -::: +Alias: +- `position(needle IN haystack)` +- `locate(haystack, needle[, start_pos])`. **Arguments** -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md). Optional. +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. **Returned values** -- Starting position in bytes (counting from 1), if substring was found. -- 0, if the substring was not found. +- Starting position in bytes and counting from 1, if the substring was found. +- 0, if the substring was not found. + +If substring `needle` is empty, these rules apply: +- if no `start_pos` was specified: return `1` +- if `start_pos = 0`: return `1` +- if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos` +- otherwise: return `0` + +The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8` Type: `Integer`. **Examples** -The phrase “Hello, world!” contains a set of bytes representing a single-byte encoded text. The function returns some expected result: - -Query: - ``` sql SELECT position('Hello, world!', '!'); ``` @@ -67,6 +67,8 @@ Result: └────────────────────────────────┘ ``` +Example with `start_pos` argument: + ``` sql SELECT position('Hello, world!', 'o', 1), @@ -79,29 +81,21 @@ SELECT └───────────────────────────────────┴───────────────────────────────────┘ ``` -The same phrase in Russian contains characters which can’t be represented using a single byte. The function returns some unexpected result (use [positionUTF8](#positionutf8) function for multi-byte encoded text): +Example for `needle IN haystack` syntax: -Query: - -``` sql -SELECT position('Привет, мир!', '!'); +```sql +SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s); ``` Result: -``` text -┌─position('Привет, мир!', '!')─┐ -│ 21 │ -└───────────────────────────────┘ +```text +┌─equals(6, position(s, '/'))─┐ +│ 1 │ +└─────────────────────────────┘ ``` -If argument `needle` is empty the following rules apply: -- if no `start_pos` was specified: return `1` -- if `start_pos = 0`: return `1` -- if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos` -- otherwise: return `0` - -The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8` +Examples with empty `needle` substring: ``` sql SELECT @@ -120,223 +114,59 @@ SELECT └─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘ ``` - -**Examples for POSITION(needle IN haystack) syntax** - -Query: - -```sql -SELECT 3 = position('c' IN 'abc'); -``` - -Result: - -```text -┌─equals(3, position('abc', 'c'))─┐ -│ 1 │ -└─────────────────────────────────┘ -``` - -Query: - -```sql -SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s); -``` - -Result: - -```text -┌─equals(6, position(s, '/'))─┐ -│ 1 │ -└─────────────────────────────┘ -``` - ## positionCaseInsensitive -The same as [position](#position) returns the position (in bytes) of the found substring in the string, starting from 1. Use the function for a case-insensitive search. - -Works under the assumption that the string contains a set of bytes representing a single-byte encoded text. If this assumption is not met and a character can’t be represented using a single byte, the function does not throw an exception and returns some unexpected result. If character can be represented using two bytes, it will use two bytes and so on. - -**Syntax** - -``` sql -positionCaseInsensitive(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md). - -**Returned values** - -- Starting position in bytes (counting from 1), if substring was found. -- 0, if the substring was not found. - -Type: `Integer`. - -**Example** - -Query: - -``` sql -SELECT positionCaseInsensitive('Hello, world!', 'hello'); -``` - -Result: - -``` text -┌─positionCaseInsensitive('Hello, world!', 'hello')─┐ -│ 1 │ -└───────────────────────────────────────────────────┘ -``` +Like [position](#position) but searches case-insensitively. ## positionUTF8 -Returns the position (in Unicode points) of the found substring in the string, starting from 1. - -Works under the assumption that the string contains a set of bytes representing a UTF-8 encoded text. If this assumption is not met, the function does not throw an exception and returns some unexpected result. If character can be represented using two Unicode points, it will use two and so on. - -For a case-insensitive search, use the function [positionCaseInsensitiveUTF8](#positioncaseinsensitiveutf8). - -**Syntax** - -``` sql -positionUTF8(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md) - -**Returned values** - -- Starting position in Unicode points (counting from 1), if substring was found. -- 0, if the substring was not found. - -Type: `Integer`. +Like [position](#position) but assumes `haystack` and `needle` are UTF-8 encoded strings. **Examples** -The phrase “Hello, world!” in Russian contains a set of Unicode points representing a single-point encoded text. The function returns some expected result: - -Query: +Function `positionUTF8` correctly counts character `ö` (represented by two points) as a single Unicode codepoint: ``` sql -SELECT positionUTF8('Привет, мир!', '!'); +SELECT positionUTF8('Motörhead', 'r'); ``` Result: ``` text -┌─positionUTF8('Привет, мир!', '!')─┐ -│ 12 │ -└───────────────────────────────────┘ -``` - -The phrase “Salut, étudiante!”, where character `é` can be represented using a one point (`U+00E9`) or two points (`U+0065U+0301`) the function can be returned some unexpected result: - -Query for the letter `é`, which is represented one Unicode point `U+00E9`: - -``` sql -SELECT positionUTF8('Salut, étudiante!', '!'); -``` - -Result: - -``` text -┌─positionUTF8('Salut, étudiante!', '!')─┐ -│ 17 │ -└────────────────────────────────────────┘ -``` - -Query for the letter `é`, which is represented two Unicode points `U+0065U+0301`: - -``` sql -SELECT positionUTF8('Salut, étudiante!', '!'); -``` - -Result: - -``` text -┌─positionUTF8('Salut, étudiante!', '!')─┐ -│ 18 │ -└────────────────────────────────────────┘ +┌─position('Motörhead', 'r')─┐ +│ 5 │ +└────────────────────────────┘ ``` ## positionCaseInsensitiveUTF8 -The same as [positionUTF8](#positionutf8), but is case-insensitive. Returns the position (in Unicode points) of the found substring in the string, starting from 1. - -Works under the assumption that the string contains a set of bytes representing a UTF-8 encoded text. If this assumption is not met, the function does not throw an exception and returns some unexpected result. If character can be represented using two Unicode points, it will use two and so on. - -**Syntax** - -``` sql -positionCaseInsensitiveUTF8(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md) - -**Returned value** - -- Starting position in Unicode points (counting from 1), if substring was found. -- 0, if the substring was not found. - -Type: `Integer`. - -**Example** - -Query: - -``` sql -SELECT positionCaseInsensitiveUTF8('Привет, мир!', 'Мир'); -``` - -Result: - -``` text -┌─positionCaseInsensitiveUTF8('Привет, мир!', 'Мир')─┐ -│ 9 │ -└────────────────────────────────────────────────────┘ -``` +Like [positionUTF8](#positionutf8) but searches case-insensitively. ## multiSearchAllPositions -The same as [position](../../sql-reference/functions/string-search-functions.md#position) but returns `Array` of positions (in bytes) of the found corresponding substrings in the string. Positions are indexed starting from 1. +Like [position](#position) but returns an array of positions (in bytes, starting at 1) for multiple `needle` substrings in a `haystack` string. -The search is performed on sequences of bytes without respect to string encoding and collation. - -- For case-insensitive ASCII search, use the function `multiSearchAllPositionsCaseInsensitive`. -- For search in UTF-8, use the function [multiSearchAllPositionsUTF8](#multiSearchAllPositionsUTF8). -- For case-insensitive UTF-8 search, use the function multiSearchAllPositionsCaseInsensitiveUTF8. +:::note +All `multiSearch*()` functions only support up to 28 needles. +::: **Syntax** ``` sql -multiSearchAllPositions(haystack, [needle1, needle2, ..., needlen]) +multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) ``` **Arguments** -- `haystack` — String, in which substring will to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. Array **Returned values** -- Array of starting positions in bytes (counting from 1), if the corresponding substring was found and 0 if not found. +- Array of the starting position in bytes and counting from 1 (if the substring was found) or 0 (if the substring was not found) **Example** -Query: - ``` sql SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']); ``` @@ -351,103 +181,172 @@ Result: ## multiSearchAllPositionsUTF8 -See `multiSearchAllPositions`. +Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings. -## multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) +## multiSearchFirstPosition -The same as `position` but returns the leftmost offset of the string `haystack` that is matched to some of the needles. +Like `position` but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. -For a case-insensitive search or/and in UTF-8 format use functions `multiSearchFirstPositionCaseInsensitive, multiSearchFirstPositionUTF8, multiSearchFirstPositionCaseInsensitiveUTF8`. +Functions `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` and `multiSearchFirstPositionCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. -## multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +**Syntax** + +```sql +multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) +``` + +## multiSearchFirstIndex Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. -For a case-insensitive search or/and in UTF-8 format use functions `multiSearchFirstIndexCaseInsensitive, multiSearchFirstIndexUTF8, multiSearchFirstIndexCaseInsensitiveUTF8`. +Functions `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` and `multiSearchFirstIndexCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. -## multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) +**Syntax** + +```sql +multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +``` + +## multiSearchAny Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. -For a case-insensitive search or/and in UTF-8 format use functions `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. +Functions `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` and `multiSearchAnyCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. -:::note -In all `multiSearch*` functions the number of needles should be less than 28 because of implementation specification. -::: +**Syntax** -## match(haystack, pattern), haystack REGEXP pattern operator +```sql +multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) +``` -Checks whether string `haystack` matches the regular expression `pattern`. The pattern is an [re2 regular expression](https://github.com/google/re2/wiki/Syntax) which has a more limited syntax than Perl regular expressions. +## match -Returns 1 in case of a match, and 0 otherwise. +Returns whether string `haystack` matches the regular expression `pattern` in [re2 regular syntax](https://github.com/google/re2/wiki/Syntax). -Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular expression must not contain null bytes. -If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. -No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. +Matching is based on UTF-8, e.g. `.` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. The regular +expression must not contain null bytes. If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. Unlike re2's default behavior, `.` matches line breaks. To disable this, prepend the pattern with `(?-s)`. -For patterns to search for substrings in a string, it is better to use functions [like](#like) or [position](#position) since they work much faster. +If you only want to search substrings in a string, you can use functions [like](#like) or [position](#position) instead - they work much faster than this function. -## multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +**Syntax** -The same as `match`, but returns 0 if none of the regular expressions are matched and 1 if any of the patterns matches. For patterns to search substrings in a string, it is better to use `multiSearchAny` since it works much faster. +```sql +match(haystack, pattern) +``` + +Alias: `haystack REGEXP pattern operator` + +## multiMatchAny + +Like `match` but returns 1 if at least one of the patterns match and 0 otherwise. :::note -Functions `multiMatchAny`, `multiMatchAnyIndex`, `multiMatchAllIndices` and their fuzzy equivalents (`multiFuzzyMatchAny`, -`multiFuzzyMatchAnyIndex`, `multiFuzzyMatchAllIndices`) use the (Vectorscan)[https://github.com/VectorCamp/vectorscan] library. As such, -they are only enabled if ClickHouse is compiled with support for vectorscan. +Functions in the `multi[Fuzzy]Match*()` family use the the (Vectorscan)[https://github.com/VectorCamp/vectorscan] library. As such, they are only enabled if ClickHouse is compiled with support for vectorscan. + +To turn off all functions that use hyperscan, use setting `SET allow_hyperscan = 0;`. Due to restrictions of vectorscan, the length of the `haystack` string must be less than 232 bytes. Hyperscan is generally vulnerable to regular expression denial of service (ReDoS) attacks (e.g. see (here)[https://www.usenix.org/conference/usenixsecurity22/presentation/turonova], (here)[https://doi.org/10.1007/s10664-021-10033-1] and -(here)[ https://doi.org/10.1145/3236024.3236027]. Users are adviced to check the provided patterns carefully. +(here)[https://doi.org/10.1145/3236024.3236027]. Users are adviced to check the provided patterns carefully. ::: -## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +If you only want to search multiple substrings in a string, you can use function [multiSearchAny](#multisearchany) instead - it works much faster than this function. -The same as `multiMatchAny`, but returns any index that matches the haystack. +**Syntax** -## multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +```sql +multiMatchAny(haystack, \[pattern1, pattern2, …, patternn\]) +``` -The same as `multiMatchAny`, but returns the array of all indices that match the haystack in any order. +## multiMatchAnyIndex -## multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +Like `multiMatchAny` but returns any index that matches the haystack. -The same as `multiMatchAny`, but returns 1 if any pattern matches the haystack within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). This function relies on the experimental feature of [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) library, and can be slow for some corner cases. The performance depends on the edit distance value and patterns used, but it's always more expensive compared to a non-fuzzy variants. +**Syntax** -## multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) +```sql +multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) +``` -The same as `multiFuzzyMatchAny`, but returns any index that matches the haystack within a constant edit distance. +## multiMatchAllIndices -## multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +Like `multiMatchAny` but returns the array of all indices that match the haystack in any order. -The same as `multiFuzzyMatchAny`, but returns the array of all indices in any order that match the haystack within a constant edit distance. +**Syntax** + +```sql +multiMatchAllIndices(haystack, \[pattern1, pattern2, …, patternn\]) +``` + +## multiFuzzyMatchAny + +Like `multiMatchAny` but returns 1 if any pattern matches the haystack within a constant [edit distance](https://en.wikipedia.org/wiki/Edit_distance). This function relies on the experimental feature of [hyperscan](https://intel.github.io/hyperscan/dev-reference/compilation.html#approximate-matching) library, and can be slow for some corner cases. The performance depends on the edit distance value and patterns used, but it's always more expensive compared to a non-fuzzy variants. :::note -`multiFuzzyMatch*` functions do not support UTF-8 regular expressions, and such expressions are treated as bytes because of hyperscan restriction. +`multiFuzzyMatch*()` function family do not support UTF-8 regular expressions (it threats them as a sequence of bytes) due to restrictions of hyperscan. ::: -:::note -To turn off all functions that use hyperscan, use setting `SET allow_hyperscan = 0;`. -::: +**Syntax** -## extract(haystack, pattern) +```sql +multiFuzzyMatchAny(haystack, distance, \[pattern1, pattern2, …, patternn\]) +``` -Extracts a fragment of a string using a regular expression. If ‘haystack’ does not match the ‘pattern’ regex, an empty string is returned. If the regex does not contain subpatterns, it takes the fragment that matches the entire regex. Otherwise, it takes the fragment that matches the first subpattern. +## multiFuzzyMatchAnyIndex -## extractAll(haystack, pattern) +Like `multiFuzzyMatchAny` but returns any index that matches the haystack within a constant edit distance. -Extracts all the fragments of a string using a regular expression. If ‘haystack’ does not match the ‘pattern’ regex, an empty string is returned. Returns an array of strings consisting of all matches to the regex. In general, the behavior is the same as the ‘extract’ function (it takes the first subpattern, or the entire expression if there isn’t a subpattern). +**Syntax** + +```sql +multiFuzzyMatchAnyIndex(haystack, distance, \[pattern1, pattern2, …, patternn\]) +``` + +## multiFuzzyMatchAllIndices + +Like `multiFuzzyMatchAny` but returns the array of all indices in any order that match the haystack within a constant edit distance. + +**Syntax** + +```sql +multiFuzzyMatchAllIndices(haystack, distance, \[pattern1, pattern2, …, patternn\]) +``` + +## extract + +Extracts a fragment of a string using a regular expression. If `haystack` does not match the `pattern` regex, an empty string is returned. + +For regex without subpatterns, the function uses the fragment that matches the entire regex. Otherwise, it uses the fragment that matches the first subpattern. + +**Syntax** + +```sql +extract(haystack, pattern) +``` + +## extractAll + +Extracts all fragments of a string using a regular expression. If `haystack` does not match the `pattern` regex, an empty string is returned. + +Returns an array of strings consisting of all matches of the regex. + +The behavior with respect to subpatterns is the same as in function `extract`. + +**Syntax** + +```sql +extractAll(haystack, pattern) +``` ## extractAllGroupsHorizontal Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc. -:::note -`extractAllGroupsHorizontal` function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). -::: +This function is slower than [extractAllGroupsVertical](#extractallgroups-vertical). **Syntax** @@ -457,19 +356,17 @@ extractAllGroupsHorizontal(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Type: [Array](../../sql-reference/data-types/array.md). If `haystack` does not match the `pattern` regex, an array of empty arrays is returned. **Example** -Query: - ``` sql SELECT extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); ``` @@ -482,10 +379,6 @@ Result: └──────────────────────────────────────────────────────────────────────────────────────────┘ ``` -**See Also** - -- [extractAllGroupsVertical](#extractallgroups-vertical) - ## extractAllGroupsVertical Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where each array includes matching fragments from every group. Fragments are grouped in order of appearance in the `haystack`. @@ -498,19 +391,17 @@ extractAllGroupsVertical(haystack, pattern) **Arguments** -- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). -- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). +- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md). +- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md). **Returned value** -- Type: [Array](../../sql-reference/data-types/array.md). +- Type: [Array](../../sql-reference/data-types/array.md). If `haystack` does not match the `pattern` regex, an empty array is returned. **Example** -Query: - ``` sql SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)'); ``` @@ -523,119 +414,88 @@ Result: └────────────────────────────────────────────────────────────────────────────────────────┘ ``` -**See Also** +## like -- [extractAllGroupsHorizontal](#extractallgroups-horizontal) +Returns whether string `haystack` matches the LIKE expression `pattern`. -## like(haystack, pattern), haystack LIKE pattern operator +A LIKE expression can contain normal characters and the following metasymbols: -Checks whether a string matches a LIKE expression. -A LIKE expression contains a mix of normal characters and the following metasymbols: - -- `%` indicates an arbitrary number of arbitrary characters (including zero characters). - -- `_` indicates a single arbitrary character. - -- `\` is for escaping literals `%`, `_` and `\`. +- `%` indicates an arbitrary number of arbitrary characters (including zero characters). +- `_` indicates a single arbitrary character. +- `\` is for escaping literals `%`, `_` and `\`. Matching is based on UTF-8, e.g. `_` matches the Unicode code point `¥` which is represented in UTF-8 using two bytes. -If the haystack or the pattern are not valid UTF-8, then the behavior is undefined. + +If the haystack or the LIKE expression are not valid UTF-8, the behavior is undefined. + No automatic Unicode normalization is performed, you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. -To match against literals `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash, i.e. `\%`, `\_` and `\\`. -The backslash loses its special meaning, i.e. is interpreted literally, if it prepends a character different than `%`, `_` or `\`. +To match against literal `%`, `_` and `/` (which are LIKE metacharacters), prepend them with a backslash: `\%`, `\_` and `\\`. +The backslash loses its special meaning (i.e. is interpreted literally) if it prepends a character different than `%`, `_` or `\`. Note that ClickHouse requires backslashes in strings [to be quoted as well](../syntax.md#string), so you would actually need to write `\\%`, `\\_` and `\\\\`. -For patterns of the form `%needle%`, the function is as fast as the `position` function. -Other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`. - -## notLike(haystack, pattern), haystack NOT LIKE pattern operator - -The same thing as `like`, but negative. - -## ilike - -Case insensitive variant of [like](https://clickhouse.com/docs/en/sql-reference/functions/string-search-functions/#function-like) function. You can use `ILIKE` operator instead of the `ilike` function. - -The function ignores the language, e.g. for Turkish (i/İ), the result might be incorrect. +For LIKE expressions of the form `%needle%`, the function is as fast as the `position` function. +All other LIKE expressions are internally converted to a regular expression and executed with a performance similar to function `match`. **Syntax** -``` sql -ilike(haystack, pattern) +```sql +like(haystack, pattern) ``` -**Arguments** +Alias: `haystack LIKE pattern` (operator) -- `haystack` — Input string. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — If `pattern` does not contain percent signs or underscores, then the `pattern` only represents the string itself. An underscore (`_`) in `pattern` stands for (matches) any single character. A percent sign (`%`) matches any sequence of zero or more characters. +## notLike -Some `pattern` examples: +Like `like` but negates the result. -``` text -'abc' ILIKE 'abc' true -'abc' ILIKE 'a%' true -'abc' ILIKE '_b_' true -'abc' ILIKE 'c' false +Alias: `haystack NOT LIKE pattern` (operator) + +## ilike + +Like `like` but searches case-insensitively. + +Alias: `haystack ILIKE pattern` (operator) + +## notILike + +Like `ilike` but negates the result. + +Alias: `haystack NOT ILIKE pattern` (operator) + +## ngramDistance + +Calculates the 4-gram distance between a `haystack` string and a `needle` string. For that, it counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns a Float32 between 0 and 1. The smaller the result is, the more strings are similar to each other. Throws an exception if constant `needle` or `haystack` arguments are more than 32Kb in size. If any of non-constant `haystack` or `needle` arguments is more than 32Kb in size, the distance is always 1. + +Functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. + +**Syntax** + +```sql +ngramDistance(haystack, needle) ``` -**Returned values** +## ngramSearch -- True, if the string matches `pattern`. -- False, if the string does not match `pattern`. +Like `ngramDistance` but calculates the non-symmetric difference between a `needle` string and a `haystack` string, i.e. the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. Returns a Float32 between 0 and 1. The bigger the result is, the more likely `needle` is in the `haystack`. This function is useful for fuzzy string search. Also see function `soundex`. -**Example** - -Input table: - -``` text -┌─id─┬─name─────┬─days─┐ -│ 1 │ January │ 31 │ -│ 2 │ February │ 29 │ -│ 3 │ March │ 31 │ -│ 4 │ April │ 30 │ -└────┴──────────┴──────┘ -``` - -Query: - -``` sql -SELECT * FROM Months WHERE ilike(name, '%j%'); -``` - -Result: - -``` text -┌─id─┬─name────┬─days─┐ -│ 1 │ January │ 31 │ -└────┴─────────┴──────┘ -``` - -## notILike(haystack, pattern), haystack NOT ILIKE pattern operator - -The same thing as `ilike`, but negative. - -## ngramDistance(haystack, needle) - -Calculates the 4-gram distance between `haystack` and `needle`: counts the symmetric difference between two multisets of 4-grams and normalizes it by the sum of their cardinalities. Returns float number from 0 to 1 – the closer to zero, the more strings are similar to each other. If the constant `needle` or `haystack` is more than 32Kb, throws an exception. If some of the non-constant `haystack` or `needle` strings are more than 32Kb, the distance is always one. - -For case-insensitive search or/and in UTF-8 format use functions `ngramDistanceCaseInsensitive, ngramDistanceUTF8, ngramDistanceCaseInsensitiveUTF8`. - -## ngramSearch(haystack, needle) - -Same as `ngramDistance` but calculates the non-symmetric difference between `needle` and `haystack` – the number of n-grams from needle minus the common number of n-grams normalized by the number of `needle` n-grams. The closer to one, the more likely `needle` is in the `haystack`. Can be useful for fuzzy string search. - -For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. +Functions `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. :::note -For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. +The UTF-8 variants use the 3-gram distance. These are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters. ::: +**Syntax** + +```sql +ngramSearch(haystack, needle) +``` + ## countSubstrings -Returns the number of substring occurrences. +Returns how often substring `needle` occurs in string `haystack`. -For a case-insensitive search, use [countSubstringsCaseInsensitive](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitive) or [countSubstringsCaseInsensitiveUTF8](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitiveUTF8) functions. +Functions `countSubstringsCaseInsensitive` and `countSubstringsCaseInsensitiveUTF8` provide a case-insensitive and case-insensitive + UTF-8 variants of this function. **Syntax** @@ -645,34 +505,18 @@ countSubstrings(haystack, needle[, start_pos]) **Arguments** -- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` – Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md). +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substring to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `start_pos` – Position (1-based) in `haystack` at which the search starts. [UInt](../../sql-reference/data-types/int-uint.md). Optional. **Returned values** -- Number of occurrences. +- The number of occurrences. Type: [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** -Query: - -``` sql -SELECT countSubstrings('foobar.com', '.'); -``` - -Result: - -``` text -┌─countSubstrings('foobar.com', '.')─┐ -│ 1 │ -└────────────────────────────────────┘ -``` - -Query: - ``` sql SELECT countSubstrings('aaaa', 'aa'); ``` @@ -685,7 +529,7 @@ Result: └───────────────────────────────┘ ``` -Query: +Example with `start_pos` argument: ```sql SELECT countSubstrings('abc___abc', 'abc', 4); @@ -699,125 +543,7 @@ Result: └────────────────────────────────────────┘ ``` -## countSubstringsCaseInsensitive - -Returns the number of substring occurrences case-insensitive. - -**Syntax** - -``` sql -countSubstringsCaseInsensitive(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md). - -**Returned values** - -- Number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). - -**Examples** - -Query: - -``` sql -SELECT countSubstringsCaseInsensitive('aba', 'B'); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitive('aba', 'B')─┐ -│ 1 │ -└────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT countSubstringsCaseInsensitive('foobar.com', 'CoM'); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitive('foobar.com', 'CoM')─┐ -│ 1 │ -└─────────────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT countSubstringsCaseInsensitive('abC___abC', 'aBc', 2); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitive('abC___abC', 'aBc', 2)─┐ -│ 1 │ -└───────────────────────────────────────────────────────┘ -``` - -## countSubstringsCaseInsensitiveUTF8 - -Returns the number of substring occurrences in `UTF-8` case-insensitive. - -**Syntax** - -``` sql -SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos]) -``` - -**Arguments** - -- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `start_pos` — Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md). - -**Returned values** - -- Number of occurrences. - -Type: [UInt64](../../sql-reference/data-types/int-uint.md). - -**Examples** - -Query: - -``` sql -SELECT countSubstringsCaseInsensitiveUTF8('абв', 'A'); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitiveUTF8('абв', 'A')─┐ -│ 1 │ -└────────────────────────────────────────────────┘ -``` - -Query: - -```sql -SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв'); -``` - -Result: - -``` text -┌─countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв')─┐ -│ 3 │ -└────────────────────────────────────────────────────────────┘ -``` - -## countMatches(haystack, pattern) +## countMatches Returns the number of regular expression matches for a `pattern` in a `haystack`. @@ -829,19 +555,17 @@ countMatches(haystack, pattern) **Arguments** -- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). +- `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `pattern` — The regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). [String](../../sql-reference/data-types/string.md). **Returned value** -- The number of matches. +- The number of matches. Type: [UInt64](../../sql-reference/data-types/int-uint.md). **Examples** -Query: - ``` sql SELECT countMatches('foobar.com', 'o+'); ``` @@ -854,8 +578,6 @@ Result: └──────────────────────────────────┘ ``` -Query: - ``` sql SELECT countMatches('aaaa', 'aa'); ``` @@ -868,7 +590,7 @@ Result: └───────────────────────────────┘ ``` -## regexpExtract(haystack, pattern[, index]) +## regexpExtract Extracts the first string in haystack that matches the regexp pattern and corresponds to the regex group index. @@ -882,9 +604,9 @@ Alias: `REGEXP_EXTRACT(haystack, pattern[, index])`. **Arguments** -- `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../../sql-reference/data-types/int-uint.md). Optional. +- `haystack` — String, in which regexp pattern will to be matched. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `pattern` — String, regexp expression, must be constant. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `index` – An integer number greater or equal 0 with default 1. It represents which regex group to extract. [UInt or Int](../../sql-reference/data-types/int-uint.md). Optional. **Returned values** @@ -899,8 +621,12 @@ SELECT regexpExtract('100-200', '(\\d+)-(\\d+)', 1), regexpExtract('100-200', '(\\d+)-(\\d+)', 2), regexpExtract('100-200', '(\\d+)-(\\d+)', 0), - regexpExtract('100-200', '(\\d+)-(\\d+)') + regexpExtract('100-200', '(\\d+)-(\\d+)'); +``` +Result: + +``` text ┌─regexpExtract('100-200', '(\\d+)-(\\d+)', 1)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 2)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)', 0)─┬─regexpExtract('100-200', '(\\d+)-(\\d+)')─┐ │ 100 │ 200 │ 100-200 │ 100 │ └──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘ diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index c0eed01cccd..d8f23c92e61 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/time-window-functions -sidebar_position: 68 +sidebar_position: 175 sidebar_label: Time Window --- @@ -118,4 +118,4 @@ hopEnd(time_attr, hop_interval, window_interval [, timezone]); ## Related content -- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) \ No newline at end of file +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index c4742d0bac7..d7594e67443 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -1,8 +1,7 @@ --- slug: /en/sql-reference/functions/tuple-functions -sidebar_position: 66 +sidebar_position: 180 sidebar_label: Tuples -title: "Functions for Working with Tuples" --- ## tuple @@ -47,11 +46,11 @@ You can use the `EXCEPT` expression to skip columns as a result of the query. **Arguments** -- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). +- `x` — A `tuple` function, column, or tuple of elements. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- None. +- None. **Examples** @@ -111,7 +110,7 @@ Result: **See Also** -- [Tuple](../../sql-reference/data-types/tuple.md) +- [Tuple](../../sql-reference/data-types/tuple.md) ## tupleHammingDistance @@ -125,14 +124,14 @@ tupleHammingDistance(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). Tuples should have the same type of the elements. **Returned value** -- The Hamming distance. +- The Hamming distance. Type: The result type is calculed the same way it is for [Arithmetic functions](../../sql-reference/functions/arithmetic-functions.md), based on the number of elements in the input tuples. @@ -195,11 +194,11 @@ tupleToNameValuePairs(tuple) **Arguments** -- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. +- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values. **Returned value** -- An array with (name, value) pairs. +- An array with (name, value) pairs. Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)). @@ -272,12 +271,12 @@ Alias: `vectorSum`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the sum. +- Tuple with the sum. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -311,12 +310,12 @@ Alias: `vectorDifference`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the result of subtraction. +- Tuple with the result of subtraction. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -348,12 +347,12 @@ tupleMultiply(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the multiplication. +- Tuple with the multiplication. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -385,12 +384,12 @@ tupleDivide(tuple1, tuple2) **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the result of division. +- Tuple with the result of division. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -422,11 +421,11 @@ tupleNegate(tuple) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Tuple with the result of negation. +- Tuple with the result of negation. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -458,12 +457,12 @@ tupleMultiplyByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Multiplier. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `number` — Multiplier. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). **Returned value** -- Tuple with multiplied values. +- Tuple with multiplied values. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -495,12 +494,12 @@ tupleDivideByNumber(tuple, number) **Arguments** -- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). -- `number` — Divider. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `tuple` — [Tuple](../../sql-reference/data-types/tuple.md). +- `number` — Divider. [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). **Returned value** -- Tuple with divided values. +- Tuple with divided values. Type: [Tuple](../../sql-reference/data-types/tuple.md). @@ -534,12 +533,12 @@ Alias: `scalarProduct`. **Arguments** -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). +- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). **Returned value** -- Scalar product. +- Scalar product. Type: [Int/UInt](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index cdecbbcc2e9..786ea47f12c 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -1,8 +1,7 @@ --- slug: /en/sql-reference/functions/tuple-map-functions -sidebar_position: 46 +sidebar_position: 120 sidebar_label: Maps -title: "Functions for Maps" --- ## map @@ -17,12 +16,12 @@ map(key1, value1[, key2, value2, ...]) **Arguments** -- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). -- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). +- `key` — The key part of the pair. [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md). +- `value` — The value part of the pair. Arbitrary type, including [Map](../../sql-reference/data-types/map.md) and [Array](../../sql-reference/data-types/array.md). **Returned value** -- Data structure as `key:value` pairs. +- Data structure as `key:value` pairs. Type: [Map(key, value)](../../sql-reference/data-types/map.md). @@ -64,30 +63,33 @@ Result: **See Also** -- [Map(key, value)](../../sql-reference/data-types/map.md) data type +- [Map(key, value)](../../sql-reference/data-types/map.md) data type ## mapFromArrays Merges an [Array](../../sql-reference/data-types/array.md) of keys and an [Array](../../sql-reference/data-types/array.md) of values into a [Map(key, value)](../../sql-reference/data-types/map.md). Notice that the second argument could also be a [Map](../../sql-reference/data-types/map.md), thus it is casted to an Array when executing. + The function is a more convenient alternative to `CAST((key_array, value_array_or_map), 'Map(key_type, value_type)')`. For example, instead of writing `CAST((['aa', 'bb'], [4, 5]), 'Map(String, UInt32)')`, you can write `mapFromArrays(['aa', 'bb'], [4, 5])`. - + + **Syntax** ```sql mapFromArrays(keys, values) -``` +``` Alias: `MAP_FROM_ARRAYS(keys, values)` - + **Arguments** -- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md) -- `values` - Given value array or map to create a map from. - + +- `keys` — Given key array to create a map from. The nested type of array must be: [String](../../sql-reference/data-types/string.md), [Integer](../../sql-reference/data-types/int-uint.md), [LowCardinality](../../sql-reference/data-types/lowcardinality.md), [FixedString](../../sql-reference/data-types/fixedstring.md), [UUID](../../sql-reference/data-types/uuid.md), [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md), [Date32](../../sql-reference/data-types/date32.md), [Enum](../../sql-reference/data-types/enum.md) +- `values` - Given value array or map to create a map from. + **Returned value** - A map whose keys and values are constructed from the key array and value array/map. - + **Example** Query: @@ -95,6 +97,7 @@ Query: ```sql select mapFromArrays(['a', 'b', 'c'], [1, 2, 3]) + ┌─mapFromArrays(['a', 'b', 'c'], [1, 2, 3])─┐ │ {'a':1,'b':2,'c':3} │ └───────────────────────────────────────────┘ @@ -122,7 +125,7 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sq **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** @@ -170,7 +173,7 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sq **Returned value** -- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** @@ -223,17 +226,17 @@ Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../. Mapped arrays: -- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). -- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges). +- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). +- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)). +- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges). or -- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md). +- `map` — Map with integer keys. [Map](../../sql-reference/data-types/map.md). **Returned value** -- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. +- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys. **Example** @@ -277,12 +280,12 @@ mapContains(map, key) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). -- `key` — Key. Type matches the type of keys of `map` parameter. +- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `key` — Key. Type matches the type of keys of `map` parameter. **Returned value** -- `1` if `map` contains `key`, `0` if not. +- `1` if `map` contains `key`, `0` if not. Type: [UInt8](../../sql-reference/data-types/int-uint.md). @@ -322,11 +325,11 @@ mapKeys(map) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../../sql-reference/data-types/map.md). **Returned value** -- Array containing all keys from the `map`. +- Array containing all keys from the `map`. Type: [Array](../../sql-reference/data-types/array.md). @@ -365,11 +368,11 @@ mapValues(map) **Arguments** -- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `map` — Map. [Map](../../sql-reference/data-types/map.md). **Returned value** -- Array containing all the values from `map`. +- Array containing all the values from `map`. Type: [Array](../../sql-reference/data-types/array.md). @@ -392,25 +395,24 @@ Result: │ ['eleven','11'] │ │ ['twelve','6.0'] │ └──────────────────┘ -``` - -## mapContainsKeyLike - +``` + +## mapContainsKeyLike + **Syntax** ```sql mapContainsKeyLike(map, pattern) -``` - +``` + **Arguments** - -- `map` — Map. [Map](../../sql-reference/data-types/map.md). -- `pattern` - String pattern to match. - +- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `pattern` - String pattern to match. + **Returned value** -- `1` if `map` contains `key` like specified pattern, `0` if not. - +- `1` if `map` contains `key` like specified pattern, `0` if not. + **Example** Query: @@ -421,34 +423,34 @@ CREATE TABLE test (a Map(String,String)) ENGINE = Memory; INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); SELECT mapContainsKeyLike(a, 'a%') FROM test; -``` - -Result: - +``` + +Result: + ```text ┌─mapContainsKeyLike(a, 'a%')─┐ │ 1 │ │ 0 │ -└─────────────────────────────┘ -``` - -## mapExtractKeyLike - +└─────────────────────────────┘ +``` + +## mapExtractKeyLike + **Syntax** ```sql mapExtractKeyLike(map, pattern) -``` - +``` + **Arguments** - -- `map` — Map. [Map](../../sql-reference/data-types/map.md). -- `pattern` - String pattern to match. - + +- `map` — Map. [Map](../../sql-reference/data-types/map.md). +- `pattern` - String pattern to match. + **Returned value** - A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map. - + **Example** Query: @@ -459,34 +461,34 @@ CREATE TABLE test (a Map(String,String)) ENGINE = Memory; INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'}); SELECT mapExtractKeyLike(a, 'a%') FROM test; -``` - -Result: - +``` + +Result: + ```text ┌─mapExtractKeyLike(a, 'a%')─┐ │ {'abc':'abc'} │ │ {} │ └────────────────────────────┘ -``` - -## mapApply - +``` + +## mapApply + **Syntax** ```sql mapApply(func, map) -``` - +``` + **Arguments** - -- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). + +- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `map` — [Map](../../sql-reference/data-types/map.md). **Returned value** - Returns a map obtained from the original map by application of `func(map1[i], …, mapN[i])` for each element. - + **Example** Query: @@ -498,36 +500,36 @@ FROM SELECT map('key1', number, 'key2', number * 2) AS _map FROM numbers(3) ) -``` - -Result: - +``` + +Result: + ```text ┌─r─────────────────────┐ │ {'key1':0,'key2':0} │ │ {'key1':10,'key2':20} │ │ {'key1':20,'key2':40} │ └───────────────────────┘ -``` +``` + +## mapFilter -## mapFilter - **Syntax** ```sql mapFilter(func, map) -``` - +``` + **Arguments** -- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). -- `map` — [Map](../../sql-reference/data-types/map.md). +- `func` - [Lambda function](../../sql-reference/functions/index.md#higher-order-functions---operator-and-lambdaparams-expr-function). +- `map` — [Map](../../sql-reference/data-types/map.md). **Returned value** - Returns a map containing only the elements in `map` for which `func(map1[i], …, mapN[i])` returns something other than 0. - - + + **Example** Query: @@ -539,48 +541,195 @@ FROM SELECT map('key1', number, 'key2', number * 2) AS _map FROM numbers(3) ) -``` - -Result: - +``` + +Result: + ```text ┌─r───────────────────┐ │ {'key1':0,'key2':0} │ │ {'key2':2} │ │ {'key1':2,'key2':4} │ └─────────────────────┘ -``` +``` -## mapUpdate - +## mapUpdate + **Syntax** ```sql mapUpdate(map1, map2) -``` - +``` + **Arguments** -- `map1` [Map](../../sql-reference/data-types/map.md). -- `map2` [Map](../../sql-reference/data-types/map.md). +- `map1` [Map](../../sql-reference/data-types/map.md). +- `map2` [Map](../../sql-reference/data-types/map.md). **Returned value** - Returns a map1 with values updated of values for the corresponding keys in map2. - + **Example** Query: ```sql SELECT mapUpdate(map('key1', 0, 'key3', 0), map('key1', 10, 'key2', 10)) AS map; -``` - -Result: - +``` + +Result: + ```text ┌─map────────────────────────────┐ │ {'key3':0,'key1':10,'key2':10} │ └────────────────────────────────┘ -``` +``` + +## mapConcat + +**Syntax** + +```sql +mapConcat(maps) +``` + +**Arguments** + +- `maps` – Arbitrary number of arguments of [Map](../../sql-reference/data-types/map.md) type. + +**Returned value** + +- Returns a map with concatenated maps passed as arguments. If there are same keys in two or more maps, all of them are added to the result map, but only the first one is accessible via operator `[]` + +**Examples** + +Query: + +```sql +SELECT mapConcat(map('key1', 1, 'key3', 3), map('key2', 2)) AS map; +``` + +Result: + +```text +┌─map──────────────────────────┐ +│ {'key1':1,'key3':3,'key2':2} │ +└──────────────────────────────┘ +``` + +Query: + +```sql +SELECT mapConcat(map('key1', 1, 'key2', 2), map('key1', 3)) AS map, map['key1']; +``` + +Result: + +```text +┌─map──────────────────────────┬─elem─┐ +│ {'key1':1,'key2':2,'key1':3} │ 1 │ +└──────────────────────────────┴──────┘ +``` + +## mapExists(\[func,\], map) + +Returns 1 if there is at least one key-value pair in `map` for which `func(key, value)` returns something other than 0. Otherwise, it returns 0. + +Note that the `mapExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + +**Example** + +Query: + +```sql +SELECT mapExists((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res +``` + +Result: + +```text +┌─res─┐ +│ 1 │ +└─────┘ +``` + +## mapAll(\[func,\] map) + +Returns 1 if `func(key, value)` returns something other than 0 for all key-value pairs in `map`. Otherwise, it returns 0. + +Note that the `mapAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + +**Example** + +Query: + +```sql +SELECT mapAll((k, v) -> (v = 1), map('k1', 1, 'k2', 2)) AS res +``` + +Result: + +```text +┌─res─┐ +│ 0 │ +└─────┘ +``` + +## mapSort(\[func,\], map) + +Sorts the elements of the `map` in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. + +**Examples** + +``` sql +SELECT mapSort(map('key2', 2, 'key3', 1, 'key1', 3)) AS map; +``` + +``` text +┌─map──────────────────────────┐ +│ {'key1':3,'key2':2,'key3':1} │ +└──────────────────────────────┘ +``` + +``` sql +SELECT mapSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; +``` + +``` text +┌─map──────────────────────────┐ +│ {'key3':1,'key2':2,'key1':3} │ +└──────────────────────────────┘ +``` + +For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-sort) for `arraySort` function. + +## mapReverseSort(\[func,\], map) + +Sorts the elements of the `map` in descending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the keys and values of the map. + + +**Examples** + +``` sql +SELECT mapReverseSort(map('key2', 2, 'key3', 1, 'key1', 3)) AS map; +``` + +``` text +┌─map──────────────────────────┐ +│ {'key3':1,'key2':2,'key1':3} │ +└──────────────────────────────┘ +``` + +``` sql +SELECT mapReverseSort((k, v) -> v, map('key2', 2, 'key3', 1, 'key1', 3)) AS map; +``` + +``` text +┌─map──────────────────────────┐ +│ {'key1':3,'key2':2,'key3':1} │ +└──────────────────────────────┘ +``` + +For more details see the [reference](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) for `arrayReverseSort` function. diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 213ed187f15..c7c66cc771f 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/type-conversion-functions -sidebar_position: 38 +sidebar_position: 185 sidebar_label: Type Conversion --- @@ -8,12 +8,14 @@ sidebar_label: Type Conversion ## Common Issues with Data Conversion -Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between -incompatible datatypes (for example from `String` to `Int`). Make sure to check carefully if the result is as expected. - ClickHouse generally uses the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion). -`to` functions and [cast](#castx-t) have different behaviour in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#castx-t) removes [LowCardinality](../data-types/lowcardinality.md) trait `to` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting. +`to` functions and [cast](#castx-t) behave differently in some cases, for example in case of [LowCardinality](../data-types/lowcardinality.md): [cast](#castx-t) removes [LowCardinality](../data-types/lowcardinality.md) trait `to` functions don't. The same with [Nullable](../data-types/nullable.md), this behaviour is not compatible with SQL standard, and it can be changed using [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting. + +:::note +Be aware of potential data loss if values of a datatype are converted to a smaller datatype (for example from `Int64` to `Int32`) or between +incompatible datatypes (for example from `String` to `Int`). Make sure to check carefully if the result is as expected. +::: Example: @@ -51,16 +53,16 @@ SETTINGS cast_keep_nullable = 1 Converts an input value to a value the [Int](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: -- `toInt8(expr)` — Converts to a value of data type `Int8`. -- `toInt16(expr)` — Converts to a value of data type `Int16`. -- `toInt32(expr)` — Converts to a value of data type `Int32`. -- `toInt64(expr)` — Converts to a value of data type `Int64`. -- `toInt128(expr)` — Converts to a value of data type `Int128`. -- `toInt256(expr)` — Converts to a value of data type `Int256`. +- `toInt8(expr)` — Converts to a value of data type `Int8`. +- `toInt16(expr)` — Converts to a value of data type `Int16`. +- `toInt32(expr)` — Converts to a value of data type `Int32`. +- `toInt64(expr)` — Converts to a value of data type `Int64`. +- `toInt128(expr)` — Converts to a value of data type `Int128`. +- `toInt256(expr)` — Converts to a value of data type `Int256`. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** @@ -151,19 +153,19 @@ Result: Converts an input value to the [UInt](/docs/en/sql-reference/data-types/int-uint.md) data type. This function family includes: -- `toUInt8(expr)` — Converts to a value of data type `UInt8`. -- `toUInt16(expr)` — Converts to a value of data type `UInt16`. -- `toUInt32(expr)` — Converts to a value of data type `UInt32`. -- `toUInt64(expr)` — Converts to a value of data type `UInt64`. -- `toUInt256(expr)` — Converts to a value of data type `UInt256`. +- `toUInt8(expr)` — Converts to a value of data type `UInt8`. +- `toUInt16(expr)` — Converts to a value of data type `UInt16`. +- `toUInt32(expr)` — Converts to a value of data type `UInt32`. +- `toUInt64(expr)` — Converts to a value of data type `UInt64`. +- `toUInt256(expr)` — Converts to a value of data type `UInt256`. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) returning a number or a string with the decimal representation of a number. Binary, octal, and hexadecimal representations of numbers are not supported. Leading zeroes are stripped. **Returned value** -- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type. +- Integer value in the `UInt8`, `UInt16`, `UInt32`, `UInt64` or `UInt256` data type. Functions use [rounding towards zero](https://en.wikipedia.org/wiki/Rounding#Rounding_towards_zero), meaning they truncate fractional digits of numbers. @@ -338,11 +340,11 @@ toDate32(expr) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md). +- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md) or [Date](/docs/en/sql-reference/data-types/date.md). **Returned value** -- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md). +- A calendar date. Type [Date32](/docs/en/sql-reference/data-types/date32.md). **Example** @@ -456,13 +458,13 @@ toDateTime64(expr, scale, [timezone]) **Arguments** -- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). -- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. -- `timezone` - Time zone of the specified datetime64 object. +- `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [UInt32](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). +- `scale` - Tick size (precision): 10-precision seconds. Valid range: [ 0 : 9 ]. +- `timezone` - Time zone of the specified datetime64 object. **Returned value** -- A calendar date and time of day, with sub-second precision. +- A calendar date and time of day, with sub-second precision. Type: [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). @@ -522,33 +524,33 @@ SELECT toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') AS value, toTypeN Converts `value` to the [Decimal](/docs/en/sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places. -- `toDecimal32(value, S)` -- `toDecimal64(value, S)` -- `toDecimal128(value, S)` -- `toDecimal256(value, S)` +- `toDecimal32(value, S)` +- `toDecimal64(value, S)` +- `toDecimal128(value, S)` +- `toDecimal256(value, S)` ## toDecimal(32\|64\|128\|256)OrNull Converts an input string to a [Nullable(Decimal(P,S))](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: -- `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. -- `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. -- `toDecimal128OrNull(expr, S)` — Results in `Nullable(Decimal128(S))` data type. -- `toDecimal256OrNull(expr, S)` — Results in `Nullable(Decimal256(S))` data type. +- `toDecimal32OrNull(expr, S)` — Results in `Nullable(Decimal32(S))` data type. +- `toDecimal64OrNull(expr, S)` — Results in `Nullable(Decimal64(S))` data type. +- `toDecimal128OrNull(expr, S)` — Results in `Nullable(Decimal128(S))` data type. +- `toDecimal256OrNull(expr, S)` — Results in `Nullable(Decimal256(S))` data type. These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `NULL` value instead of an exception in the event of an input value parsing error. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `S` — Scale, the number of decimal places in the resulting value. **Returned value** A value in the `Nullable(Decimal(P,S))` data type. The value contains: -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- `NULL`, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. +- Number with `S` decimal places, if ClickHouse interprets the input string as a number. +- `NULL`, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. **Examples** @@ -585,24 +587,24 @@ Result: Converts an input string to a [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type value. This family of functions includes: -- `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. -- `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. -- `toDecimal128OrDefault(expr, S)` — Results in `Decimal128(S)` data type. -- `toDecimal256OrDefault(expr, S)` — Results in `Decimal256(S)` data type. +- `toDecimal32OrDefault(expr, S)` — Results in `Decimal32(S)` data type. +- `toDecimal64OrDefault(expr, S)` — Results in `Decimal64(S)` data type. +- `toDecimal128OrDefault(expr, S)` — Results in `Decimal128(S)` data type. +- `toDecimal256OrDefault(expr, S)` — Results in `Decimal256(S)` data type. These functions should be used instead of `toDecimal*()` functions, if you prefer to get a default value instead of an exception in the event of an input value parsing error. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `S` — Scale, the number of decimal places in the resulting value. **Returned value** A value in the `Decimal(P,S)` data type. The value contains: -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- Default `Decimal(P,S)` data type value, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. +- Number with `S` decimal places, if ClickHouse interprets the input string as a number. +- Default `Decimal(P,S)` data type value, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. **Examples** @@ -638,24 +640,24 @@ Result: Converts an input value to the [Decimal(P,S)](/docs/en/sql-reference/data-types/decimal.md) data type. This family of functions includes: -- `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. -- `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. -- `toDecimal128OrZero( expr, S)` — Results in `Decimal128(S)` data type. -- `toDecimal256OrZero( expr, S)` — Results in `Decimal256(S)` data type. +- `toDecimal32OrZero( expr, S)` — Results in `Decimal32(S)` data type. +- `toDecimal64OrZero( expr, S)` — Results in `Decimal64(S)` data type. +- `toDecimal128OrZero( expr, S)` — Results in `Decimal128(S)` data type. +- `toDecimal256OrZero( expr, S)` — Results in `Decimal256(S)` data type. These functions should be used instead of `toDecimal*()` functions, if you prefer to get a `0` value instead of an exception in the event of an input value parsing error. **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. -- `S` — Scale, the number of decimal places in the resulting value. +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions), returns a value in the [String](/docs/en/sql-reference/data-types/string.md) data type. ClickHouse expects the textual representation of the decimal number. For example, `'1.111'`. +- `S` — Scale, the number of decimal places in the resulting value. **Returned value** A value in the `Nullable(Decimal(P,S))` data type. The value contains: -- Number with `S` decimal places, if ClickHouse interprets the input string as a number. -- 0 with `S` decimal places, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. +- Number with `S` decimal places, if ClickHouse interprets the input string as a number. +- 0 with `S` decimal places, if ClickHouse can’t interpret the input string as a number or if the input number contains more than `S` decimal places. **Example** @@ -783,14 +785,14 @@ toDecimalString(number, scale) **Parameters** -- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), -- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `number` — Value to be represented as String, [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md), [Float](/docs/en/sql-reference/data-types/float.md), [Decimal](/docs/en/sql-reference/data-types/decimal.md), +- `scale` — Number of fractional digits, [UInt8](/docs/en/sql-reference/data-types/int-uint.md). * Maximum scale for [Decimal](/docs/en/sql-reference/data-types/decimal.md) and [Int, UInt](/docs/en/sql-reference/data-types/int-uint.md) types is 77 (it is the maximum possible number of significant digits for Decimal), * Maximum scale for [Float](/docs/en/sql-reference/data-types/float.md) is 60. **Returned value** -- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). +- Input value represented as [String](/docs/en/sql-reference/data-types/string.md) with given number of fractional digits (scale). The number is rounded up or down according to common arithmetics in case requested scale is smaller than original number's scale. **Example** @@ -845,11 +847,11 @@ reinterpretAsUUID(fixed_string) **Arguments** -- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring). +- `fixed_string` — Big-endian byte string. [FixedString](/docs/en/sql-reference/data-types/fixedstring.md/#fixedstring). **Returned value** -- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type). +- The UUID type value. [UUID](/docs/en/sql-reference/data-types/uuid.md/#uuid-data-type). **Examples** @@ -901,12 +903,12 @@ reinterpret(x, type) **Arguments** -- `x` — Any type. -- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md). +- `x` — Any type. +- `type` — Destination type. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Destination type value. +- Destination type value. **Examples** @@ -940,13 +942,13 @@ x::t **Arguments** -- `x` — A value to convert. May be of any type. -- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md). -- `t` — The target data type. +- `x` — A value to convert. May be of any type. +- `T` — The name of the target data type. [String](/docs/en/sql-reference/data-types/string.md). +- `t` — The target data type. **Returned value** -- Converted value. +- Converted value. :::note If the input value does not fit the bounds of the target type, the result overflows. For example, `CAST(-1, 'UInt8')` returns `255`. @@ -1028,7 +1030,7 @@ Result: **See also** -- [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting +- [cast_keep_nullable](../../operations/settings/settings.md/#cast_keep_nullable) setting ## accurateCast(x, T) @@ -1076,12 +1078,12 @@ accurateCastOrNull(x, T) **Parameters** -- `x` — Input value. -- `T` — The name of the returned data type. +- `x` — Input value. +- `T` — The name of the returned data type. **Returned value** -- The value, converted to the specified data type `T`. +- The value, converted to the specified data type `T`. **Example** @@ -1129,13 +1131,13 @@ accurateCastOrDefault(x, T) **Parameters** -- `x` — Input value. -- `T` — The name of the returned data type. -- `default_value` — Default value of returned data type. +- `x` — Input value. +- `T` — The name of the returned data type. +- `default_value` — Default value of returned data type. **Returned value** -- The value converted to the specified data type `T`. +- The value converted to the specified data type `T`. **Example** @@ -1192,11 +1194,11 @@ toIntervalYear(number) **Arguments** -- `number` — Duration of interval. Positive integer number. +- `number` — Duration of interval. Positive integer number. **Returned values** -- The value in `Interval` data type. +- The value in `Interval` data type. **Example** @@ -1234,9 +1236,9 @@ parseDateTime(str, format[, timezone]) **Arguments** -- `str` — the String to be parsed -- `format` — the format string -- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. +- `str` — the String to be parsed +- `format` — the format string +- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. **Returned value(s)** @@ -1245,7 +1247,6 @@ Returns DateTime values parsed from input string according to a MySQL style form **Supported format specifiers** All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except: -- %f: fractional second - %Q: Quarter (1-4) **Example** @@ -1284,9 +1285,9 @@ parseDateTimeInJodaSyntax(str, format[, timezone]) **Arguments** -- `str` — the String to be parsed -- `format` — the format string -- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. +- `str` — the String to be parsed +- `format` — the format string +- `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional. **Returned value(s)** @@ -1332,22 +1333,22 @@ parseDateTimeBestEffort(time_string [, time_zone]) **Arguments** -- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date and time to convert. [String](/docs/en/sql-reference/data-types/string.md). +- `time_zone` — Time zone. The function parses `time_string` according to the time zone. [String](/docs/en/sql-reference/data-types/string.md). **Supported non-standard formats** -- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time). -- A string with a date and a time component: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc. -- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` etc. -- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`. -- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`. +- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time). +- A string with a date and a time component: `YYYYMMDDhhmmss`, `DD/MM/YYYY hh:mm:ss`, `DD-MM-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc. +- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `DD/MM/YYYY`, `DD-MM-YY` etc. +- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case `YYYY-MM` are substituted as `2000-01`. +- A string that includes the date and time along with time zone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`. For all of the formats with separator the function parses months names expressed by their full name or by the first three letters of a month name. Examples: `24/DEC/18`, `24-Dec-18`, `01-September-2018`. **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Examples** @@ -1427,10 +1428,10 @@ Result: **See Also** -- [RFC 1123](https://tools.ietf.org/html/rfc1123) -- [toDate](#todate) -- [toDateTime](#todatetime) -- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/) +- [RFC 1123](https://tools.ietf.org/html/rfc1123) +- [toDate](#todate) +- [toDateTime](#todatetime) +- [ISO 8601 announcement by @xkcd](https://xkcd.com/1179/) ## parseDateTimeBestEffortUS @@ -1466,13 +1467,13 @@ parseDateTime64BestEffort(time_string [, precision [, time_zone]]) **Parameters** -- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). -- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `time_string` — String containing a date or date with time to convert. [String](/docs/en/sql-reference/data-types/string.md). +- `precision` — Required precision. `3` — for milliseconds, `6` — for microseconds. Default — `3`. Optional. [UInt8](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- `time_string` converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Examples** @@ -1534,11 +1535,11 @@ toLowCardinality(expr) **Arguments** -- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). +- `expr` — [Expression](/docs/en/sql-reference/syntax.md/#syntax-expressions) resulting in one of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types). **Returned values** -- Result of `expr`. +- Result of `expr`. Type: `LowCardinality(expr_result_type)` @@ -1580,11 +1581,11 @@ toUnixTimestamp64Nano(value) **Arguments** -- `value` — DateTime64 value with any precision. +- `value` — DateTime64 value with any precision. **Returned value** -- `value` converted to the `Int64` data type. +- `value` converted to the `Int64` data type. **Examples** @@ -1636,12 +1637,12 @@ fromUnixTimestamp64Nano(value [, timezone]) **Arguments** -- `value` — `Int64` value with any precision. -- `timezone` — `String` (optional) timezone name of the result. +- `value` — `Int64` value with any precision. +- `timezone` — `String` (optional) timezone name of the result. **Returned value** -- `value` converted to the `DateTime64` data type. +- `value` converted to the `DateTime64` data type. **Example** @@ -1672,12 +1673,12 @@ formatRow(format, x, y, ...) **Arguments** -- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). -- `x`,`y`, ... — Expressions. +- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). +- `x`,`y`, ... — Expressions. **Returned value** -- A formatted string. (for text formats it's usually terminated with the new line character). +- A formatted string. (for text formats it's usually terminated with the new line character). **Example** @@ -1743,12 +1744,12 @@ formatRowNoNewline(format, x, y, ...) **Arguments** -- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). -- `x`,`y`, ... — Expressions. +- `format` — Text format. For example, [CSV](/docs/en/interfaces/formats.md/#csv), [TSV](/docs/en/interfaces/formats.md/#tabseparated). +- `x`,`y`, ... — Expressions. **Returned value** -- A formatted string. +- A formatted string. **Example** @@ -1781,12 +1782,12 @@ snowflakeToDateTime(value [, time_zone]) **Parameters** -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Input value converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. +- Input value converted to the [DateTime](/docs/en/sql-reference/data-types/datetime.md) data type. **Example** @@ -1817,12 +1818,12 @@ snowflakeToDateTime64(value [, time_zone]) **Parameters** -- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). -- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). +- `value` — Snowflake ID. [Int64](/docs/en/sql-reference/data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](/docs/en/sql-reference/data-types/string.md). **Returned value** -- Input value converted to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. +- Input value converted to the [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) data type. **Example** @@ -1853,11 +1854,11 @@ dateTimeToSnowflake(value) **Parameters** -- `value` — Date and time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). +- `value` — Date and time. [DateTime](/docs/en/sql-reference/data-types/datetime.md). **Returned value** -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** @@ -1887,11 +1888,11 @@ dateTime64ToSnowflake(value) **Parameters** -- `value` — Date and time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). +- `value` — Date and time. [DateTime64](/docs/en/sql-reference/data-types/datetime64.md). **Returned value** -- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. +- Input value converted to the [Int64](/docs/en/sql-reference/data-types/int-uint.md) data type as the first Snowflake ID at that time. **Example** diff --git a/docs/en/sql-reference/functions/ulid-functions.md b/docs/en/sql-reference/functions/ulid-functions.md index 3d32a851df2..eb69b1779ae 100644 --- a/docs/en/sql-reference/functions/ulid-functions.md +++ b/docs/en/sql-reference/functions/ulid-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/ulid-functions -sidebar_position: 54 +sidebar_position: 190 sidebar_label: ULID --- @@ -18,7 +18,7 @@ generateULID([x]) **Arguments** -- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. **Returned value** @@ -60,12 +60,12 @@ ULIDStringToDateTime(ulid[, timezone]) **Arguments** -- `ulid` — Input ULID. [String](/docs/en/sql-reference/data-types/string.md) or [FixedString(26)](/docs/en/sql-reference/data-types/fixedstring.md). -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). +- `ulid` — Input ULID. [String](/docs/en/sql-reference/data-types/string.md) or [FixedString(26)](/docs/en/sql-reference/data-types/fixedstring.md). +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../../sql-reference/data-types/string.md). **Returned value** -- Timestamp with milliseconds precision. +- Timestamp with milliseconds precision. Type: [DateTime64(3)](/docs/en/sql-reference/data-types/datetime64.md). @@ -83,4 +83,4 @@ SELECT ULIDStringToDateTime('01GNB2S2FGN2P93QPXDNB4EN2R') ## See Also -- [UUID](../../sql-reference/functions/uuid-functions.md) +- [UUID](../../sql-reference/functions/uuid-functions.md) diff --git a/docs/en/sql-reference/functions/uniqtheta-functions.md b/docs/en/sql-reference/functions/uniqtheta-functions.md index b2d3712abfc..abe58e48715 100644 --- a/docs/en/sql-reference/functions/uniqtheta-functions.md +++ b/docs/en/sql-reference/functions/uniqtheta-functions.md @@ -1,5 +1,7 @@ --- slug: /en/sql-reference/functions/uniqtheta-functions +sidebar_position: 210 +sidebar_label: uniqTheta --- # uniqTheta Functions @@ -21,7 +23,7 @@ uniqThetaUnion(uniqThetaSketch,uniqThetaSketch) **Arguments** -- `uniqThetaSketch` – uniqThetaSketch object. +- `uniqThetaSketch` – uniqThetaSketch object. **Example** @@ -47,7 +49,7 @@ uniqThetaIntersect(uniqThetaSketch,uniqThetaSketch) **Arguments** -- `uniqThetaSketch` – uniqThetaSketch object. +- `uniqThetaSketch` – uniqThetaSketch object. **Example** @@ -73,7 +75,7 @@ uniqThetaNot(uniqThetaSketch,uniqThetaSketch) **Arguments** -- `uniqThetaSketch` – uniqThetaSketch object. +- `uniqThetaSketch` – uniqThetaSketch object. **Example** @@ -91,4 +93,4 @@ from **See Also** -- [uniqThetaSketch](../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) +- [uniqThetaSketch](../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) diff --git a/docs/en/sql-reference/functions/url-functions.md b/docs/en/sql-reference/functions/url-functions.md index b515f6ad518..f6871c86c4f 100644 --- a/docs/en/sql-reference/functions/url-functions.md +++ b/docs/en/sql-reference/functions/url-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/url-functions -sidebar_position: 54 +sidebar_position: 200 sidebar_label: URLs --- @@ -28,7 +28,7 @@ domain(url) **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -48,8 +48,8 @@ clickhouse.com **Returned values** -- Host name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse can’t parse the input string as a URL. +- Host name. If ClickHouse can parse the input string as a URL. +- Empty string. If ClickHouse can’t parse the input string as a URL. Type: `String`. @@ -79,7 +79,7 @@ topLevelDomain(url) **Arguments** -- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). +- `url` — URL. Type: [String](../../sql-reference/data-types/string.md). The URL can be specified with or without a scheme. Examples: @@ -91,8 +91,8 @@ https://clickhouse.com/time/ **Returned values** -- Domain name. If ClickHouse can parse the input string as a URL. -- Empty string. If ClickHouse cannot parse the input string as a URL. +- Domain name. If ClickHouse can parse the input string as a URL. +- Empty string. If ClickHouse cannot parse the input string as a URL. Type: `String`. @@ -118,9 +118,9 @@ Returns the part of the domain that includes top-level subdomains up to the “f For example: -- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. -- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`. -- `cutToFirstSignificantSubdomain('tr') = ''`. +- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. ### cutToFirstSignificantSubdomainWithWWW @@ -128,9 +128,9 @@ Returns the part of the domain that includes top-level subdomains up to the “f For example: -- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. -- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`. -- `cutToFirstSignificantSubdomain('tr') = ''`. +- `cutToFirstSignificantSubdomain('https://news.clickhouse.com.tr/') = 'clickhouse.com.tr'`. +- `cutToFirstSignificantSubdomain('www.tr') = 'www.tr'`. +- `cutToFirstSignificantSubdomain('tr') = ''`. ### cutToFirstSignificantSubdomainCustom @@ -157,12 +157,12 @@ cutToFirstSignificantSubdomain(URL, TLD) **Parameters** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain. +- Part of the domain that includes top-level subdomains up to the first significant subdomain. Type: [String](../../sql-reference/data-types/string.md). @@ -184,7 +184,7 @@ Result: **See Also** -- [firstSignificantSubdomain](#firstsignificantsubdomain). +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### cutToFirstSignificantSubdomainCustomWithWWW @@ -211,12 +211,12 @@ cutToFirstSignificantSubdomainCustomWithWWW(URL, TLD) **Parameters** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). **Returned value** -- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. +- Part of the domain that includes top-level subdomains up to the first significant subdomain without stripping `www`. Type: [String](../../sql-reference/data-types/string.md). @@ -238,7 +238,7 @@ Result: **See Also** -- [firstSignificantSubdomain](#firstsignificantsubdomain). +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### firstSignificantSubdomainCustom @@ -265,12 +265,12 @@ firstSignificantSubdomainCustom(URL, TLD) **Parameters** -- `URL` — URL. [String](../../sql-reference/data-types/string.md). -- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). +- `URL` — URL. [String](../../sql-reference/data-types/string.md). +- `TLD` — Custom TLD list name. [String](../../sql-reference/data-types/string.md). **Returned value** -- First significant subdomain. +- First significant subdomain. Type: [String](../../sql-reference/data-types/string.md). @@ -292,7 +292,7 @@ Result: **See Also** -- [firstSignificantSubdomain](#firstsignificantsubdomain). +- [firstSignificantSubdomain](#firstsignificantsubdomain). ### port(URL\[, default_port = 0\]) @@ -418,11 +418,11 @@ netloc(URL) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `url` — URL. [String](../../sql-reference/data-types/string.md). **Returned value** -- `username:password@host:port`. +- `username:password@host:port`. Type: `String`. @@ -474,12 +474,12 @@ cutURLParameter(URL, name) **Arguments** -- `url` — URL. [String](../../sql-reference/data-types/string.md). -- `name` — name of URL parameter. [String](../../sql-reference/data-types/string.md) or [Array](../../sql-reference/data-types/array.md) of Strings. +- `url` — URL. [String](../../sql-reference/data-types/string.md). +- `name` — name of URL parameter. [String](../../sql-reference/data-types/string.md) or [Array](../../sql-reference/data-types/array.md) of Strings. **Returned value** -- URL with `name` URL parameter removed. +- URL with `name` URL parameter removed. Type: `String`. diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index 474e3248d1f..c338add3a57 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -1,12 +1,10 @@ --- slug: /en/sql-reference/functions/uuid-functions -sidebar_position: 53 -sidebar_label: UUID +sidebar_position: 205 +sidebar_label: UUIDs --- -# Functions for Working with UUID - -The functions for working with UUID are listed below. +# Functions for Working with UUIDs ## generateUUIDv4 @@ -20,7 +18,7 @@ generateUUIDv4([x]) **Arguments** -- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. +- `x` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in any of the [supported data types](../../sql-reference/data-types/index.md#data_types). The resulting value is discarded, but the expression itself if used for bypassing [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in one query. Optional parameter. **Returned value** @@ -69,11 +67,11 @@ The function also works for [arrays](array-functions.md#function-empty) or [stri **Arguments** -- `x` — Input UUID. [UUID](../data-types/uuid.md). +- `x` — Input UUID. [UUID](../data-types/uuid.md). **Returned value** -- Returns `1` for an empty UUID or `0` for a non-empty UUID. +- Returns `1` for an empty UUID or `0` for a non-empty UUID. Type: [UInt8](../data-types/int-uint.md). @@ -111,11 +109,11 @@ The function also works for [arrays](array-functions.md#function-notempty) or [s **Arguments** -- `x` — Input UUID. [UUID](../data-types/uuid.md). +- `x` — Input UUID. [UUID](../data-types/uuid.md). **Returned value** -- Returns `1` for a non-empty UUID or `0` for an empty UUID. +- Returns `1` for a non-empty UUID or `0` for an empty UUID. Type: [UInt8](../data-types/int-uint.md). @@ -165,8 +163,8 @@ SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid **Arguments** -- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#string). -- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](/docs/en/sql-reference/data-types/uuid.md). +- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#string). +- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](/docs/en/sql-reference/data-types/uuid.md). **Returned value** @@ -265,8 +263,8 @@ UUIDStringToNum(string[, variant = 1]) **Arguments** -- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#syntax-string-literal). -- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. +- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#syntax-string-literal). +- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -310,8 +308,8 @@ UUIDNumToString(binary[, variant = 1]) **Arguments** -- `binary` — [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as a binary representation of a UUID. -- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. +- `binary` — [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as a binary representation of a UUID. +- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`. **Returned value** @@ -355,10 +353,10 @@ serverUUID() **Returned value** -- The UUID of the server. +- The UUID of the server. Type: [UUID](../data-types/uuid.md). ## See Also -- [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) +- [dictGetUUID](../../sql-reference/functions/ext-dict-functions.md#ext_dict_functions-other) diff --git a/docs/en/sql-reference/functions/ym-dict-functions.md b/docs/en/sql-reference/functions/ym-dict-functions.md index cf25b67a15a..a516f09d709 100644 --- a/docs/en/sql-reference/functions/ym-dict-functions.md +++ b/docs/en/sql-reference/functions/ym-dict-functions.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/functions/ym-dict-functions -sidebar_position: 59 +sidebar_position: 60 sidebar_label: Embedded Dictionaries --- @@ -118,13 +118,13 @@ regionToTopContinent(id[, geobase]) **Arguments** -- `id` — Region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint.md). -- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. +- `id` — Region ID from the geobase. [UInt32](../../sql-reference/data-types/int-uint.md). +- `geobase` — Dictionary key. See [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). Optional. **Returned value** -- Identifier of the top level continent (the latter when you climb the hierarchy of regions). -- 0, if there is none. +- Identifier of the top level continent (the latter when you climb the hierarchy of regions). +- 0, if there is none. Type: `UInt32`. diff --git a/docs/en/sql-reference/operators/in.md b/docs/en/sql-reference/operators/in.md index 0599a50c0a4..8a8c86624d2 100644 --- a/docs/en/sql-reference/operators/in.md +++ b/docs/en/sql-reference/operators/in.md @@ -158,7 +158,7 @@ Now let’s examine a query with IN: SELECT uniq(UserID) FROM distributed_table WHERE CounterID = 101500 AND UserID IN (SELECT UserID FROM local_table WHERE CounterID = 34) ``` -- Calculation of the intersection of audiences of two sites. +- Calculation of the intersection of audiences of two sites. This query will be sent to all remote servers as diff --git a/docs/en/sql-reference/operators/index.md b/docs/en/sql-reference/operators/index.md index ade2d601f80..c8ed2627e2b 100644 --- a/docs/en/sql-reference/operators/index.md +++ b/docs/en/sql-reference/operators/index.md @@ -159,12 +159,12 @@ Extract parts from a given date. For example, you can retrieve a month from a gi The `part` parameter specifies which part of the date to retrieve. The following values are available: -- `DAY` — The day of the month. Possible values: 1–31. -- `MONTH` — The number of a month. Possible values: 1–12. -- `YEAR` — The year. -- `SECOND` — The second. Possible values: 0–59. -- `MINUTE` — The minute. Possible values: 0–59. -- `HOUR` — The hour. Possible values: 0–23. +- `DAY` — The day of the month. Possible values: 1–31. +- `MONTH` — The number of a month. Possible values: 1–12. +- `YEAR` — The year. +- `SECOND` — The second. Possible values: 0–59. +- `MINUTE` — The minute. Possible values: 0–59. +- `HOUR` — The hour. Possible values: 0–23. The `part` parameter is case-insensitive. @@ -285,8 +285,8 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul') AS time, time + 60 * 6 **See Also** -- [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type -- [toInterval](../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions +- [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type +- [toInterval](../../sql-reference/functions/type-conversion-functions.md#function-tointerval) type conversion functions ## Logical AND Operator @@ -355,10 +355,10 @@ ClickHouse supports the `IS NULL` and `IS NOT NULL` operators. ### IS NULL -- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns: - - `1`, if the value is `NULL`. - - `0` otherwise. -- For other values, the `IS NULL` operator always returns `0`. +- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns: + - `1`, if the value is `NULL`. + - `0` otherwise. +- For other values, the `IS NULL` operator always returns `0`. Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn instead of reading and processing the whole column data. The query `SELECT n IS NULL FROM table` transforms to `SELECT n.null FROM TABLE`. @@ -376,10 +376,10 @@ SELECT x+100 FROM t_null WHERE y IS NULL ### IS NOT NULL -- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns: - - `0`, if the value is `NULL`. - - `1` otherwise. -- For other values, the `IS NOT NULL` operator always returns `1`. +- For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns: + - `0`, if the value is `NULL`. + - `1` otherwise. +- For other values, the `IS NOT NULL` operator always returns `1`. diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index 921785102a8..ff55f700023 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -18,14 +18,14 @@ Each action is an operation on a column. The following actions are supported: -- [ADD COLUMN](#add-column) — Adds a new column to the table. -- [DROP COLUMN](#drop-column) — Deletes the column. -- [RENAME COLUMN](#rename-column) — Renames an existing column. -- [CLEAR COLUMN](#clear-column) — Resets column values. -- [COMMENT COLUMN](#comment-column) — Adds a text comment to the column. -- [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression and TTL. -- [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties. -- [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing. +- [ADD COLUMN](#add-column) — Adds a new column to the table. +- [DROP COLUMN](#drop-column) — Deletes the column. +- [RENAME COLUMN](#rename-column) — Renames an existing column. +- [CLEAR COLUMN](#clear-column) — Resets column values. +- [COMMENT COLUMN](#comment-column) — Adds a text comment to the column. +- [MODIFY COLUMN](#modify-column) — Changes column’s type, default expression and TTL. +- [MODIFY COLUMN REMOVE](#modify-column-remove) — Removes one of the column properties. +- [MATERIALIZE COLUMN](#materialize-column) — Materializes the column in the parts where the column is missing. These actions are described in detail below. @@ -144,13 +144,13 @@ ALTER COLUMN [IF EXISTS] name TYPE [type] [default_expr] [codec] [TTL] [AFTER na This query changes the `name` column properties: -- Type +- Type -- Default expression +- Default expression -- Compression Codec +- Compression Codec -- TTL +- TTL For examples of columns compression CODECS modifying, see [Column Compression Codecs](../create/table.md/#codecs). diff --git a/docs/en/sql-reference/statements/alter/delete.md b/docs/en/sql-reference/statements/alter/delete.md index 30ed96c0b9c..b6f45b67d52 100644 --- a/docs/en/sql-reference/statements/alter/delete.md +++ b/docs/en/sql-reference/statements/alter/delete.md @@ -25,6 +25,10 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do **See also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) -- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) -- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) +- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting + +## Related content + +- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse) diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 5d7b92bd34d..7a687a067aa 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -8,14 +8,14 @@ sidebar_label: ALTER Most `ALTER TABLE` queries modify table settings or data: -- [COLUMN](/docs/en/sql-reference/statements/alter/column.md) -- [PARTITION](/docs/en/sql-reference/statements/alter/partition.md) -- [DELETE](/docs/en/sql-reference/statements/alter/delete.md) -- [UPDATE](/docs/en/sql-reference/statements/alter/update.md) -- [ORDER BY](/docs/en/sql-reference/statements/alter/order-by.md) -- [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md) -- [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md) -- [TTL](/docs/en/sql-reference/statements/alter/ttl.md) +- [COLUMN](/docs/en/sql-reference/statements/alter/column.md) +- [PARTITION](/docs/en/sql-reference/statements/alter/partition.md) +- [DELETE](/docs/en/sql-reference/statements/alter/delete.md) +- [UPDATE](/docs/en/sql-reference/statements/alter/update.md) +- [ORDER BY](/docs/en/sql-reference/statements/alter/order-by.md) +- [INDEX](/docs/en/sql-reference/statements/alter/skipping-index.md) +- [CONSTRAINT](/docs/en/sql-reference/statements/alter/constraint.md) +- [TTL](/docs/en/sql-reference/statements/alter/ttl.md) :::note Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables, as well as [Merge](/docs/en/engines/table-engines/special/merge.md) and [Distributed](/docs/en/engines/table-engines/special/distributed.md). @@ -23,16 +23,16 @@ Most `ALTER TABLE` queries are supported only for [\*MergeTree](/docs/en/engines These `ALTER` statements manipulate views: -- [ALTER TABLE ... MODIFY QUERY](/docs/en/sql-reference/statements/alter/view.md) — Modifies a [Materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized) structure. -- [ALTER LIVE VIEW](/docs/en/sql-reference/statements/alter/view.md/#alter-live-view) — Refreshes a [Live view](/docs/en/sql-reference/statements/create/view.md/#live-view). +- [ALTER TABLE ... MODIFY QUERY](/docs/en/sql-reference/statements/alter/view.md) — Modifies a [Materialized view](/docs/en/sql-reference/statements/create/view.md/#materialized) structure. +- [ALTER LIVE VIEW](/docs/en/sql-reference/statements/alter/view.md/#alter-live-view) — Refreshes a [Live view](/docs/en/sql-reference/statements/create/view.md/#live-view). These `ALTER` statements modify entities related to role-based access control: -- [USER](/docs/en/sql-reference/statements/alter/user.md) -- [ROLE](/docs/en/sql-reference/statements/alter/role.md) -- [QUOTA](/docs/en/sql-reference/statements/alter/quota.md) -- [ROW POLICY](/docs/en/sql-reference/statements/alter/row-policy.md) -- [SETTINGS PROFILE](/docs/en/sql-reference/statements/alter/settings-profile.md) +- [USER](/docs/en/sql-reference/statements/alter/user.md) +- [ROLE](/docs/en/sql-reference/statements/alter/role.md) +- [QUOTA](/docs/en/sql-reference/statements/alter/quota.md) +- [ROW POLICY](/docs/en/sql-reference/statements/alter/row-policy.md) +- [SETTINGS PROFILE](/docs/en/sql-reference/statements/alter/settings-profile.md) [ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not. @@ -61,3 +61,7 @@ For all `ALTER` queries, if `alter_sync = 2` and some replicas are not active fo ::: For `ALTER TABLE ... UPDATE|DELETE` queries the synchronicity is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting. + +## Related content + +- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse) diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index aad52efb39d..52e99d93109 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -7,20 +7,20 @@ title: "Manipulating Partitions and Parts" The following operations with [partitions](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md) are available: -- [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it. -- [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part. -- [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. -- [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds. -- [REPLACE PARTITION](#replace-partition) — Copies the data partition from one table to another and replaces. -- [MOVE PARTITION TO TABLE](#move-partition-to-table) — Moves the data partition from one table to another. -- [CLEAR COLUMN IN PARTITION](#clear-column-in-partition) — Resets the value of a specified column in a partition. -- [CLEAR INDEX IN PARTITION](#clear-index-in-partition) — Resets the specified secondary index in a partition. -- [FREEZE PARTITION](#freeze-partition) — Creates a backup of a partition. -- [UNFREEZE PARTITION](#unfreeze-partition) — Removes a backup of a partition. -- [FETCH PARTITION\|PART](#fetch-partitionpart) — Downloads a part or partition from another server. -- [MOVE PARTITION\|PART](#move-partitionpart) — Move partition/data part to another disk or volume. -- [UPDATE IN PARTITION](#update-in-partition) — Update data inside the partition by condition. -- [DELETE IN PARTITION](#delete-in-partition) — Delete data inside the partition by condition. +- [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it. +- [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part. +- [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table. +- [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds. +- [REPLACE PARTITION](#replace-partition) — Copies the data partition from one table to another and replaces. +- [MOVE PARTITION TO TABLE](#move-partition-to-table) — Moves the data partition from one table to another. +- [CLEAR COLUMN IN PARTITION](#clear-column-in-partition) — Resets the value of a specified column in a partition. +- [CLEAR INDEX IN PARTITION](#clear-index-in-partition) — Resets the specified secondary index in a partition. +- [FREEZE PARTITION](#freeze-partition) — Creates a backup of a partition. +- [UNFREEZE PARTITION](#unfreeze-partition) — Removes a backup of a partition. +- [FETCH PARTITION\|PART](#fetch-partitionpart) — Downloads a part or partition from another server. +- [MOVE PARTITION\|PART](#move-partitionpart) — Move partition/data part to another disk or volume. +- [UPDATE IN PARTITION](#update-in-partition) — Update data inside the partition by condition. +- [DELETE IN PARTITION](#delete-in-partition) — Delete data inside the partition by condition. @@ -107,9 +107,9 @@ Note that data will be deleted neither from `table1` nor from `table2`. For the query to run successfully, the following conditions must be met: -- Both tables must have the same structure. -- Both tables must have the same partition key, the same order by key and the same primary key. -- Both tables must have the same storage policy (a disk where the partition is stored should be available for both tables). +- Both tables must have the same structure. +- Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same storage policy. ## REPLACE PARTITION @@ -121,9 +121,9 @@ This query copies the data partition from the `table1` to `table2` and replaces For the query to run successfully, the following conditions must be met: -- Both tables must have the same structure. -- Both tables must have the same partition key, the same order by key and the same primary key. -- Both tables must have the same storage policy (a disk where the partition is stored should be available for both tables). +- Both tables must have the same structure. +- Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same storage policy. ## MOVE PARTITION TO TABLE @@ -135,10 +135,10 @@ This query moves the data partition from the `table_source` to `table_dest` with For the query to run successfully, the following conditions must be met: -- Both tables must have the same structure. -- Both tables must have the same partition key, the same order by key and the same primary key. -- Both tables must have the same storage policy (a disk where the partition is stored should be available for both tables). -- Both tables must be the same engine family (replicated or non-replicated). +- Both tables must have the same structure. +- Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same storage policy. +- Both tables must be the same engine family (replicated or non-replicated). ## CLEAR COLUMN IN PARTITION @@ -170,9 +170,9 @@ Note that for old-styled tables you can specify the prefix of the partition name At the time of execution, for a data snapshot, the query creates hardlinks to a table data. Hardlinks are placed in the directory `/var/lib/clickhouse/shadow/N/...`, where: -- `/var/lib/clickhouse/` is the working ClickHouse directory specified in the config. -- `N` is the incremental number of the backup. -- if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number. +- `/var/lib/clickhouse/` is the working ClickHouse directory specified in the config. +- `N` is the incremental number of the backup. +- if the `WITH NAME` parameter is specified, then the value of the `'backup_name'` parameter is used instead of the incremental number. :::note If you use [a set of disks for data storage in a table](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-multiple-volumes), the `shadow/N` directory appears on every disk, storing data parts that matched by the `PARTITION` expression. @@ -240,8 +240,8 @@ ALTER TABLE users ATTACH PART 201901_2_2_0; Note that: -- The `ALTER ... FETCH PARTITION|PART` query isn’t replicated. It places the part or partition to the `detached` directory only on the local server. -- The `ALTER TABLE ... ATTACH` query is replicated. It adds the data to all replicas. The data is added to one of the replicas from the `detached` directory, and to the others - from neighboring replicas. +- The `ALTER ... FETCH PARTITION|PART` query isn’t replicated. It places the part or partition to the `detached` directory only on the local server. +- The `ALTER TABLE ... ATTACH` query is replicated. It adds the data to all replicas. The data is added to one of the replicas from the `detached` directory, and to the others - from neighboring replicas. Before downloading, the system checks if the partition exists and the table structure matches. The most appropriate replica is selected automatically from the healthy replicas. @@ -257,9 +257,9 @@ ALTER TABLE table_name [ON CLUSTER cluster] MOVE PARTITION|PART partition_expr T The `ALTER TABLE t MOVE` query: -- Not replicated, because different replicas can have different storage policies. -- Returns an error if the specified disk or volume is not configured. Query also returns an error if conditions of data moving, that specified in the storage policy, can’t be applied. -- Can return an error in the case, when data to be moved is already moved by a background process, concurrent `ALTER TABLE t MOVE` query or as a result of background data merging. A user shouldn’t perform any additional actions in this case. +- Not replicated, because different replicas can have different storage policies. +- Returns an error if the specified disk or volume is not configured. Query also returns an error if conditions of data moving, that specified in the storage policy, can’t be applied. +- Can return an error in the case, when data to be moved is already moved by a background process, concurrent `ALTER TABLE t MOVE` query or as a result of background data merging. A user shouldn’t perform any additional actions in this case. Example: @@ -286,7 +286,7 @@ ALTER TABLE mt UPDATE x = x + 1 IN PARTITION 2 WHERE p = 2; ### See Also -- [UPDATE](/docs/en/sql-reference/statements/alter/update.md/#alter-table-update-statements) +- [UPDATE](/docs/en/sql-reference/statements/alter/update.md/#alter-table-update-statements) ## DELETE IN PARTITION @@ -306,16 +306,16 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2; ### See Also -- [DELETE](/docs/en/sql-reference/statements/alter/delete.md/#alter-mutations) +- [DELETE](/docs/en/sql-reference/statements/alter/delete.md/#alter-mutations) ## How to Set Partition Expression You can specify the partition expression in `ALTER ... PARTITION` queries in different ways: -- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. -- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`. -- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. -- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. +- As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`. +- As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`. +- Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`. +- In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`. Usage of quotes when specifying the partition depends on the type of partition expression. For example, for the `String` type, you have to specify its name in quotes (`'`). For the `Date` and `Int*` types no quotes are needed. diff --git a/docs/en/sql-reference/statements/alter/setting.md b/docs/en/sql-reference/statements/alter/setting.md index f68f035146a..e18ec0cc293 100644 --- a/docs/en/sql-reference/statements/alter/setting.md +++ b/docs/en/sql-reference/statements/alter/setting.md @@ -58,4 +58,4 @@ ALTER TABLE example_table RESET SETTING max_part_loading_threads; **See Also** -- [MergeTree settings](../../../operations/settings/merge-tree-settings.md) +- [MergeTree settings](../../../operations/settings/merge-tree-settings.md) diff --git a/docs/en/sql-reference/statements/alter/skipping-index.md b/docs/en/sql-reference/statements/alter/skipping-index.md index 87124c86eac..67af76986da 100644 --- a/docs/en/sql-reference/statements/alter/skipping-index.md +++ b/docs/en/sql-reference/statements/alter/skipping-index.md @@ -10,11 +10,11 @@ sidebar_label: INDEX The following operations are available: -- `ALTER TABLE [db].table_name [ON CLUSTER cluster] ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata. +- `ALTER TABLE [db].table_name [ON CLUSTER cluster] ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata. -- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). +- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). -- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data. +- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data. The first two commands are lightweight in a sense that they only change metadata or remove files. diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md index 92f0f111b92..ab7d0ca7378 100644 --- a/docs/en/sql-reference/statements/alter/update.md +++ b/docs/en/sql-reference/statements/alter/update.md @@ -24,6 +24,11 @@ The synchronicity of the query processing is defined by the [mutations_sync](/do **See also** -- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) -- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) -- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting +- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) +- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries) +- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting + + +## Related content + +- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse) diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 31db89164d7..8785610f58a 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -25,10 +25,10 @@ To use `ALTER USER` you must have the [ALTER USER](../../../sql-reference/statem Specifies users or roles which are allowed to receive [privileges](../../../sql-reference/statements/grant.md#grant-privileges) from this user on the condition this user has also all required access granted with [GRANT OPTION](../../../sql-reference/statements/grant.md#grant-privigele-syntax). Options of the `GRANTEES` clause: -- `user` — Specifies a user this user can grant privileges to. -- `role` — Specifies a role this user can grant privileges to. -- `ANY` — This user can grant privileges to anyone. It's the default setting. -- `NONE` — This user can grant privileges to none. +- `user` — Specifies a user this user can grant privileges to. +- `role` — Specifies a role this user can grant privileges to. +- `ANY` — This user can grant privileges to anyone. It's the default setting. +- `NONE` — This user can grant privileges to none. You can exclude any user or role by using the `EXCEPT` expression. For example, `ALTER USER user1 GRANTEES ANY EXCEPT user2`. It means if `user1` has some privileges granted with `GRANT OPTION` it will be able to grant those privileges to anyone except `user2`. diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md index 8c4b8ab90a2..0209d59b018 100644 --- a/docs/en/sql-reference/statements/check-table.md +++ b/docs/en/sql-reference/statements/check-table.md @@ -16,15 +16,15 @@ The `CHECK TABLE` query compares actual file sizes with the expected values whic The query response contains the `result` column with a single row. The row has a value of [Boolean](../../sql-reference/data-types/boolean.md) type: -- 0 - The data in the table is corrupted. -- 1 - The data maintains integrity. +- 0 - The data in the table is corrupted. +- 1 - The data maintains integrity. The `CHECK TABLE` query supports the following table engines: -- [Log](../../engines/table-engines/log-family/log.md) -- [TinyLog](../../engines/table-engines/log-family/tinylog.md) -- [StripeLog](../../engines/table-engines/log-family/stripelog.md) -- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) +- [Log](../../engines/table-engines/log-family/log.md) +- [TinyLog](../../engines/table-engines/log-family/tinylog.md) +- [StripeLog](../../engines/table-engines/log-family/stripelog.md) +- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) Performed over the tables with another table engines causes an exception. diff --git a/docs/en/sql-reference/statements/create/database.md b/docs/en/sql-reference/statements/create/database.md index 7954d1362f1..a2f5b2b9fba 100644 --- a/docs/en/sql-reference/statements/create/database.md +++ b/docs/en/sql-reference/statements/create/database.md @@ -18,8 +18,8 @@ CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(.. If the `db_name` database already exists, then ClickHouse does not create a new database and: -- Doesn’t throw an exception if clause is specified. -- Throws an exception if clause isn’t specified. +- Doesn’t throw an exception if clause is specified. +- Throws an exception if clause isn’t specified. ### ON CLUSTER diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index 80d20e8ccad..08946934f0c 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -4,7 +4,7 @@ sidebar_position: 38 sidebar_label: FUNCTION --- -# CREATE FUNCTION — user defined function (UDF) +# CREATE FUNCTION - user defined function (UDF) Creates a user defined function from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls. @@ -17,9 +17,9 @@ A function can have an arbitrary number of parameters. There are a few restrictions: -- The name of a function must be unique among user defined and system functions. -- Recursive functions are not allowed. -- All variables used by a function must be specified in its parameter list. +- The name of a function must be unique among user defined and system functions. +- Recursive functions are not allowed. +- All variables used by a function must be specified in its parameter list. If any restriction is violated then an exception is raised. diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index b9062249f65..14e29d051d7 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -8,13 +8,13 @@ sidebar_label: CREATE Create queries make a new entity of one of the following kinds: -- [DATABASE](../../../sql-reference/statements/create/database.md) -- [TABLE](../../../sql-reference/statements/create/table.md) -- [VIEW](../../../sql-reference/statements/create/view.md) -- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md) -- [FUNCTION](../../../sql-reference/statements/create/function.md) -- [USER](../../../sql-reference/statements/create/user.md) -- [ROLE](../../../sql-reference/statements/create/role.md) -- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md) -- [QUOTA](../../../sql-reference/statements/create/quota.md) -- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md) +- [DATABASE](../../../sql-reference/statements/create/database.md) +- [TABLE](../../../sql-reference/statements/create/table.md) +- [VIEW](../../../sql-reference/statements/create/view.md) +- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md) +- [FUNCTION](../../../sql-reference/statements/create/function.md) +- [USER](../../../sql-reference/statements/create/user.md) +- [ROLE](../../../sql-reference/statements/create/role.md) +- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md) +- [QUOTA](../../../sql-reference/statements/create/quota.md) +- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md) diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 3432066864f..f0101d39479 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -349,10 +349,10 @@ You can’t decompress ClickHouse database files with external utilities like `l Compression is supported for the following table engines: -- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family. Supports column compression codecs and selecting the default compression method by [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) settings. -- [Log](../../../engines/table-engines/log-family/index.md) family. Uses the `lz4` compression method by default and supports column compression codecs. -- [Set](../../../engines/table-engines/special/set.md). Only supported the default compression. -- [Join](../../../engines/table-engines/special/join.md). Only supported the default compression. +- [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family. Supports column compression codecs and selecting the default compression method by [compression](../../../operations/server-configuration-parameters/settings.md#server-settings-compression) settings. +- [Log](../../../engines/table-engines/log-family/index.md) family. Uses the `lz4` compression method by default and supports column compression codecs. +- [Set](../../../engines/table-engines/special/set.md). Only supported the default compression. +- [Join](../../../engines/table-engines/special/join.md). Only supported the default compression. ClickHouse supports general purpose codecs and specialized codecs. @@ -380,10 +380,10 @@ High compression levels are useful for asymmetric scenarios, like compress once, `DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply: -- DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`. -- DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions. Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. -- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. -- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with support for AVX2/AVX512 +- DEFLATE_QPL is experimental and can only be used after setting configuration parameter `allow_experimental_codecs=1`. +- DEFLATE_QPL only works if ClickHouse was compiled with support for AVX2 or AVX512 instructions. Refer to [Build Clickhouse with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Build-Clickhouse-with-DEFLATE_QPL) for more details. +- DEFLATE_QPL works best if the system has a Intel® IAA (In-Memory Analytics Accelerator) offloading device. Refer to [Accelerator Configuration](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#accelerator-configuration) and [Benchmark with DEFLATE_QPL](/docs/en/development/building_and_benchmarking_deflate_qpl.md/#Run-Benchmark-with-DEFLATE_QPL) for more details. +- DEFLATE_QPL-compressed data can only be transferred between ClickHouse nodes compiled with support for AVX2/AVX512 ### Specialized Codecs @@ -474,12 +474,12 @@ ENGINE = MergeTree ORDER BY x; ClickHouse supports temporary tables which have the following characteristics: -- Temporary tables disappear when the session ends, including if the connection is lost. -- A temporary table uses the Memory table engine when engine is not specified and it may use any table engine except Replicated and `KeeperMap` engines. -- The DB can’t be specified for a temporary table. It is created outside of databases. -- Impossible to create a temporary table with distributed DDL query on all cluster servers (by using `ON CLUSTER`): this table exists only in the current session. -- If a temporary table has the same name as another one and a query specifies the table name without specifying the DB, the temporary table will be used. -- For distributed query processing, temporary tables used in a query are passed to remote servers. +- Temporary tables disappear when the session ends, including if the connection is lost. +- A temporary table uses the Memory table engine when engine is not specified and it may use any table engine except Replicated and `KeeperMap` engines. +- The DB can’t be specified for a temporary table. It is created outside of databases. +- Impossible to create a temporary table with distributed DDL query on all cluster servers (by using `ON CLUSTER`): this table exists only in the current session. +- If a temporary table has the same name as another one and a query specifies the table name without specifying the DB, the temporary table will be used. +- For distributed query processing, temporary tables used in a query are passed to remote servers. To create a temporary table, use the following syntax: diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index b9bd2c1c507..3548ef7cc07 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -26,15 +26,15 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] There are multiple ways of user identification: -- `IDENTIFIED WITH no_password` -- `IDENTIFIED WITH plaintext_password BY 'qwerty'` -- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` -- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` -- `IDENTIFIED WITH double_sha1_password BY 'qwerty'` -- `IDENTIFIED WITH double_sha1_hash BY 'hash'` -- `IDENTIFIED WITH ldap SERVER 'server_name'` -- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` -- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` +- `IDENTIFIED WITH no_password` +- `IDENTIFIED WITH plaintext_password BY 'qwerty'` +- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` +- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` +- `IDENTIFIED WITH double_sha1_password BY 'qwerty'` +- `IDENTIFIED WITH double_sha1_hash BY 'hash'` +- `IDENTIFIED WITH ldap SERVER 'server_name'` +- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` ## Examples @@ -96,18 +96,18 @@ There are multiple ways of user identification: User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways: -- `HOST IP 'ip_address_or_subnetwork'` — User can connect to ClickHouse server only from the specified IP address or a [subnetwork](https://en.wikipedia.org/wiki/Subnetwork). Examples: `HOST IP '192.168.0.0/16'`, `HOST IP '2001:DB8::/32'`. For use in production, only specify `HOST IP` elements (IP addresses and their masks), since using `host` and `host_regexp` might cause extra latency. -- `HOST ANY` — User can connect from any location. This is a default option. -- `HOST LOCAL` — User can connect only locally. -- `HOST NAME 'fqdn'` — User host can be specified as FQDN. For example, `HOST NAME 'mysite.com'`. -- `HOST REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST REGEXP '.*\.mysite\.com'`. -- `HOST LIKE 'template'` — Allows you to use the [LIKE](../../../sql-reference/functions/string-search-functions.md#function-like) operator to filter the user hosts. For example, `HOST LIKE '%'` is equivalent to `HOST ANY`, `HOST LIKE '%.mysite.com'` filters all the hosts in the `mysite.com` domain. +- `HOST IP 'ip_address_or_subnetwork'` — User can connect to ClickHouse server only from the specified IP address or a [subnetwork](https://en.wikipedia.org/wiki/Subnetwork). Examples: `HOST IP '192.168.0.0/16'`, `HOST IP '2001:DB8::/32'`. For use in production, only specify `HOST IP` elements (IP addresses and their masks), since using `host` and `host_regexp` might cause extra latency. +- `HOST ANY` — User can connect from any location. This is a default option. +- `HOST LOCAL` — User can connect only locally. +- `HOST NAME 'fqdn'` — User host can be specified as FQDN. For example, `HOST NAME 'mysite.com'`. +- `HOST REGEXP 'regexp'` — You can use [pcre](http://www.pcre.org/) regular expressions when specifying user hosts. For example, `HOST REGEXP '.*\.mysite\.com'`. +- `HOST LIKE 'template'` — Allows you to use the [LIKE](../../../sql-reference/functions/string-search-functions.md#function-like) operator to filter the user hosts. For example, `HOST LIKE '%'` is equivalent to `HOST ANY`, `HOST LIKE '%.mysite.com'` filters all the hosts in the `mysite.com` domain. Another way of specifying host is to use `@` syntax following the username. Examples: -- `CREATE USER mira@'127.0.0.1'` — Equivalent to the `HOST IP` syntax. -- `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. -- `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. +- `CREATE USER mira@'127.0.0.1'` — Equivalent to the `HOST IP` syntax. +- `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax. +- `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax. :::tip ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. @@ -117,10 +117,10 @@ ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technica Specifies users or roles which are allowed to receive [privileges](../../../sql-reference/statements/grant.md#grant-privileges) from this user on the condition this user has also all required access granted with [GRANT OPTION](../../../sql-reference/statements/grant.md#grant-privigele-syntax). Options of the `GRANTEES` clause: -- `user` — Specifies a user this user can grant privileges to. -- `role` — Specifies a role this user can grant privileges to. -- `ANY` — This user can grant privileges to anyone. It's the default setting. -- `NONE` — This user can grant privileges to none. +- `user` — Specifies a user this user can grant privileges to. +- `role` — Specifies a role this user can grant privileges to. +- `ANY` — This user can grant privileges to anyone. It's the default setting. +- `NONE` — This user can grant privileges to none. You can exclude any user or role by using the `EXCEPT` expression. For example, `CREATE USER user1 GRANTEES ANY EXCEPT user2`. It means if `user1` has some privileges granted with `GRANT OPTION` it will be able to grant those privileges to anyone except `user2`. diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 0def42259ab..10b15638152 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -235,7 +235,7 @@ Most common uses of live view tables include: - Watching metrics from system tables using periodic refresh. **See Also** -- [ALTER LIVE VIEW](../alter/view.md#alter-live-view) +- [ALTER LIVE VIEW](../alter/view.md#alter-live-view) ## Window View [Experimental] @@ -364,3 +364,4 @@ The window view is useful in the following scenarios: ## Related Content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) +- Blog: [Building an Observability Solution with ClickHouse - Part 2 - Traces](https://clickhouse.com/blog/storing-traces-and-spans-open-telemetry-in-clickhouse) diff --git a/docs/en/sql-reference/statements/delete.md b/docs/en/sql-reference/statements/delete.md index 149e7ab371f..fa9f08e225f 100644 --- a/docs/en/sql-reference/statements/delete.md +++ b/docs/en/sql-reference/statements/delete.md @@ -8,7 +8,7 @@ title: DELETE Statement --- ``` sql -DELETE FROM [db.]table [ON CLUSTER cluster] [WHERE expr] +DELETE FROM [db.]table [ON CLUSTER cluster] WHERE expr ``` `DELETE FROM` removes rows from the table `[db.]table` that match the expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in the background. This feature is only available for the MergeTree table engine family. @@ -55,3 +55,7 @@ With the described implementation now we can see what can negatively affect 'DEL - Table having a very large number of data parts - Having a lot of data in Compact parts—in a Compact part, all columns are stored in one file. + +## Related content + +- Blog: [Handling Updates and Deletes in ClickHouse](https://clickhouse.com/blog/handling-updates-and-deletes-in-clickhouse) diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md index b9190107127..20f7061dedd 100644 --- a/docs/en/sql-reference/statements/describe-table.md +++ b/docs/en/sql-reference/statements/describe-table.md @@ -15,14 +15,14 @@ DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] The `DESCRIBE` statement returns a row for each table column with the following [String](../../sql-reference/data-types/string.md) values: -- `name` — A column name. -- `type` — A column type. -- `default_type` — A clause that is used in the column [default expression](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` or `ALIAS`. If there is no default expression, then empty string is returned. -- `default_expression` — An expression specified after the `DEFAULT` clause. -- `comment` — A [column comment](../../sql-reference/statements/alter/column.md#alter_comment-column). -- `codec_expression` — A [codec](../../sql-reference/statements/create/table.md#codecs) that is applied to the column. -- `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression. -- `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. +- `name` — A column name. +- `type` — A column type. +- `default_type` — A clause that is used in the column [default expression](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` or `ALIAS`. If there is no default expression, then empty string is returned. +- `default_expression` — An expression specified after the `DEFAULT` clause. +- `comment` — A [column comment](../../sql-reference/statements/alter/column.md#alter_comment-column). +- `codec_expression` — A [codec](../../sql-reference/statements/create/table.md#codecs) that is applied to the column. +- `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression. +- `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. All columns in [Nested](../../sql-reference/data-types/nested-data-structures/index.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot. @@ -66,4 +66,4 @@ The second query additionally shows subcolumns: **See Also** -- [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. +- [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting. diff --git a/docs/en/sql-reference/statements/detach.md b/docs/en/sql-reference/statements/detach.md index 5f1513d3f44..6f08168bbef 100644 --- a/docs/en/sql-reference/statements/detach.md +++ b/docs/en/sql-reference/statements/detach.md @@ -72,5 +72,5 @@ Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table defa **See Also** -- [Materialized View](../../sql-reference/statements/create/view.md#materialized) -- [Dictionaries](../../sql-reference/dictionaries/index.md) +- [Materialized View](../../sql-reference/statements/create/view.md#materialized) +- [Dictionaries](../../sql-reference/dictionaries/index.md) diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 8a83a8fae1d..b6208c2fd52 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -22,6 +22,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC] Deletes the table. +:::tip +Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) +::: + Syntax: ``` sql diff --git a/docs/en/sql-reference/statements/exchange.md b/docs/en/sql-reference/statements/exchange.md index 33f3e08d547..babb29ae977 100644 --- a/docs/en/sql-reference/statements/exchange.md +++ b/docs/en/sql-reference/statements/exchange.md @@ -41,4 +41,4 @@ EXCHANGE DICTIONARIES [db0.]dict_A AND [db1.]dict_B [ON CLUSTER cluster] **See Also** -- [Dictionaries](../../sql-reference/dictionaries/index.md) +- [Dictionaries](../../sql-reference/dictionaries/index.md) diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index 5081abf2fb8..1c93707402f 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -45,11 +45,11 @@ Union ## EXPLAIN Types -- `AST` — Abstract syntax tree. -- `SYNTAX` — Query text after AST-level optimizations. -- `QUERY TREE` — Query tree after Query Tree level optimizations. -- `PLAN` — Query execution plan. -- `PIPELINE` — Query execution pipeline. +- `AST` — Abstract syntax tree. +- `SYNTAX` — Query text after AST-level optimizations. +- `QUERY TREE` — Query tree after Query Tree level optimizations. +- `PLAN` — Query execution plan. +- `PIPELINE` — Query execution pipeline. ### EXPLAIN AST @@ -115,9 +115,9 @@ CROSS JOIN system.numbers AS c Settings: -- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`. -- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`. -- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`. +- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`. +- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`. +- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`. Example: ```sql @@ -143,11 +143,11 @@ Dump query plan steps. Settings: -- `header` — Prints output header for step. Default: 0. -- `description` — Prints step description. Default: 1. -- `indexes` — Shows used indexes, the number of filtered parts and the number of filtered granules for every index applied. Default: 0. Supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. -- `actions` — Prints detailed information about step actions. Default: 0. -- `json` — Prints query plan steps as a row in [JSON](../../interfaces/formats.md#json) format. Default: 0. It is recommended to use [TSVRaw](../../interfaces/formats.md#tabseparatedraw) format to avoid unnecessary escaping. +- `header` — Prints output header for step. Default: 0. +- `description` — Prints step description. Default: 1. +- `indexes` — Shows used indexes, the number of filtered parts and the number of filtered granules for every index applied. Default: 0. Supported for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. +- `actions` — Prints detailed information about step actions. Default: 0. +- `json` — Prints query plan steps as a row in [JSON](../../interfaces/formats.md#json) format. Default: 0. It is recommended to use [TSVRaw](../../interfaces/formats.md#tabseparatedraw) format to avoid unnecessary escaping. Example: @@ -276,12 +276,12 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; With `indexes` = 1, the `Indexes` key is added. It contains an array of used indexes. Each index is described as JSON with `Type` key (a string `MinMax`, `Partition`, `PrimaryKey` or `Skip`) and optional keys: -- `Name` — The index name (currently only used for `Skip` indexes). -- `Keys` — The array of columns used by the index. -- `Condition` — The used condition. -- `Description` — The index description (currently only used for `Skip` indexes). -- `Parts` — The number of parts before/after the index is applied. -- `Granules` — The number of granules before/after the index is applied. +- `Name` — The index name (currently only used for `Skip` indexes). +- `Keys` — The array of columns used by the index. +- `Condition` — The used condition. +- `Description` — The index description (currently only used for `Skip` indexes). +- `Parts` — The number of parts before/after the index is applied. +- `Granules` — The number of granules before/after the index is applied. Example: @@ -380,9 +380,9 @@ EXPLAIN json = 1, actions = 1, description = 0 SELECT 1 FORMAT TSVRaw; Settings: -- `header` — Prints header for each output port. Default: 0. -- `graph` — Prints a graph described in the [DOT](https://en.wikipedia.org/wiki/DOT_(graph_description_language)) graph description language. Default: 0. -- `compact` — Prints graph in compact mode if `graph` setting is enabled. Default: 1. +- `header` — Prints header for each output port. Default: 0. +- `graph` — Prints a graph described in the [DOT](https://en.wikipedia.org/wiki/DOT_(graph_description_language)) graph description language. Default: 0. +- `compact` — Prints graph in compact mode if `graph` setting is enabled. Default: 1. Example: diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md index 1d9b2c9ea30..ff66c8f6754 100644 --- a/docs/en/sql-reference/statements/grant.md +++ b/docs/en/sql-reference/statements/grant.md @@ -6,8 +6,8 @@ sidebar_label: GRANT # GRANT Statement -- Grants [privileges](#grant-privileges) to ClickHouse user accounts or roles. -- Assigns roles to user accounts or to the other roles. +- Grants [privileges](#grant-privileges) to ClickHouse user accounts or roles. +- Assigns roles to user accounts or to the other roles. To revoke privileges, use the [REVOKE](../../sql-reference/statements/revoke.md) statement. Also you can list granted privileges with the [SHOW GRANTS](../../sql-reference/statements/show.md#show-grants-statement) statement. @@ -17,9 +17,9 @@ To revoke privileges, use the [REVOKE](../../sql-reference/statements/revoke.md) GRANT [ON CLUSTER cluster_name] privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] ``` -- `privilege` — Type of privilege. -- `role` — ClickHouse user role. -- `user` — ClickHouse user account. +- `privilege` — Type of privilege. +- `role` — ClickHouse user role. +- `user` — ClickHouse user account. The `WITH GRANT OPTION` clause grants `user` or `role` with permission to execute the `GRANT` query. Users can grant privileges of the same scope they have and less. The `WITH REPLACE OPTION` clause replace old privileges by new privileges for the `user` or `role`, if is not specified it appends privileges. @@ -30,12 +30,24 @@ The `WITH REPLACE OPTION` clause replace old privileges by new privileges for th GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_USER} [,...] [WITH ADMIN OPTION] [WITH REPLACE OPTION] ``` -- `role` — ClickHouse user role. -- `user` — ClickHouse user account. +- `role` — ClickHouse user role. +- `user` — ClickHouse user account. The `WITH ADMIN OPTION` clause grants [ADMIN OPTION](#admin-option-privilege) privilege to `user` or `role`. The `WITH REPLACE OPTION` clause replace old roles by new role for the `user` or `role`, if is not specified it appends roles. +## Grant Current Grants Syntax +``` sql +GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] +``` + +- `privilege` — Type of privilege. +- `role` — ClickHouse user role. +- `user` — ClickHouse user account. + +Using the `CURRENT GRANTS` statement allows you to give all specified privileges to the given user or role. +If none of the privileges were specified, then the given user or role will receive all available privileges for `CURRENT_USER`. + ## Usage To use `GRANT`, your account must have the `GRANT OPTION` privilege. You can grant privileges only inside the scope of your account privileges. @@ -48,9 +60,9 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION It means that `john` has the permission to execute: -- `SELECT x,y FROM db.table`. -- `SELECT x FROM db.table`. -- `SELECT y FROM db.table`. +- `SELECT x,y FROM db.table`. +- `SELECT x FROM db.table`. +- `SELECT y FROM db.table`. `john` can’t execute `SELECT z FROM db.table`. The `SELECT * FROM db.table` also is not available. Processing this query, ClickHouse does not return any data, even `x` and `y`. The only exception is if a table contains only `x` and `y` columns. In this case ClickHouse returns all the data. @@ -70,151 +82,151 @@ Privileges have a hierarchical structure. A set of permitted queries depends on Hierarchy of privileges: -- [SELECT](#grant-select) -- [INSERT](#grant-insert) -- [ALTER](#grant-alter) - - `ALTER TABLE` - - `ALTER UPDATE` - - `ALTER DELETE` - - `ALTER COLUMN` - - `ALTER ADD COLUMN` - - `ALTER DROP COLUMN` - - `ALTER MODIFY COLUMN` - - `ALTER COMMENT COLUMN` - - `ALTER CLEAR COLUMN` - - `ALTER RENAME COLUMN` - - `ALTER INDEX` - - `ALTER ORDER BY` - - `ALTER SAMPLE BY` - - `ALTER ADD INDEX` - - `ALTER DROP INDEX` - - `ALTER MATERIALIZE INDEX` - - `ALTER CLEAR INDEX` - - `ALTER CONSTRAINT` - - `ALTER ADD CONSTRAINT` - - `ALTER DROP CONSTRAINT` - - `ALTER TTL` - - `ALTER MATERIALIZE TTL` - - `ALTER SETTINGS` - - `ALTER MOVE PARTITION` - - `ALTER FETCH PARTITION` - - `ALTER FREEZE PARTITION` - - `ALTER VIEW` - - `ALTER VIEW REFRESH` - - `ALTER VIEW MODIFY QUERY` -- [CREATE](#grant-create) - - `CREATE DATABASE` - - `CREATE TABLE` - - `CREATE ARBITRARY TEMPORARY TABLE` - - `CREATE TEMPORARY TABLE` - - `CREATE VIEW` - - `CREATE DICTIONARY` - - `CREATE FUNCTION` -- [DROP](#grant-drop) - - `DROP DATABASE` - - `DROP TABLE` - - `DROP VIEW` - - `DROP DICTIONARY` - - `DROP FUNCTION` -- [TRUNCATE](#grant-truncate) -- [OPTIMIZE](#grant-optimize) -- [SHOW](#grant-show) - - `SHOW DATABASES` - - `SHOW TABLES` - - `SHOW COLUMNS` - - `SHOW DICTIONARIES` -- [KILL QUERY](#grant-kill-query) -- [ACCESS MANAGEMENT](#grant-access-management) - - `CREATE USER` - - `ALTER USER` - - `DROP USER` - - `CREATE ROLE` - - `ALTER ROLE` - - `DROP ROLE` - - `CREATE ROW POLICY` - - `ALTER ROW POLICY` - - `DROP ROW POLICY` - - `CREATE QUOTA` - - `ALTER QUOTA` - - `DROP QUOTA` - - `CREATE SETTINGS PROFILE` - - `ALTER SETTINGS PROFILE` - - `DROP SETTINGS PROFILE` - - `SHOW ACCESS` - - `SHOW_USERS` - - `SHOW_ROLES` - - `SHOW_ROW_POLICIES` - - `SHOW_QUOTAS` - - `SHOW_SETTINGS_PROFILES` - - `ROLE ADMIN` -- [SYSTEM](#grant-system) - - `SYSTEM SHUTDOWN` - - `SYSTEM DROP CACHE` - - `SYSTEM DROP DNS CACHE` - - `SYSTEM DROP MARK CACHE` - - `SYSTEM DROP UNCOMPRESSED CACHE` - - `SYSTEM RELOAD` - - `SYSTEM RELOAD CONFIG` - - `SYSTEM RELOAD DICTIONARY` - - `SYSTEM RELOAD EMBEDDED DICTIONARIES` - - `SYSTEM RELOAD FUNCTION` - - `SYSTEM RELOAD FUNCTIONS` - - `SYSTEM MERGES` - - `SYSTEM TTL MERGES` - - `SYSTEM FETCHES` - - `SYSTEM MOVES` - - `SYSTEM SENDS` - - `SYSTEM DISTRIBUTED SENDS` - - `SYSTEM REPLICATED SENDS` - - `SYSTEM REPLICATION QUEUES` - - `SYSTEM SYNC REPLICA` - - `SYSTEM RESTART REPLICA` - - `SYSTEM FLUSH` - - `SYSTEM FLUSH DISTRIBUTED` - - `SYSTEM FLUSH LOGS` - - `CLUSTER` (see also `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive) -- [INTROSPECTION](#grant-introspection) - - `addressToLine` - - `addressToLineWithInlines` - - `addressToSymbol` - - `demangle` -- [SOURCES](#grant-sources) - - `FILE` - - `URL` - - `REMOTE` - - `YSQL` - - `ODBC` - - `JDBC` - - `HDFS` - - `S3` -- [dictGet](#grant-dictget) +- [SELECT](#grant-select) +- [INSERT](#grant-insert) +- [ALTER](#grant-alter) + - `ALTER TABLE` + - `ALTER UPDATE` + - `ALTER DELETE` + - `ALTER COLUMN` + - `ALTER ADD COLUMN` + - `ALTER DROP COLUMN` + - `ALTER MODIFY COLUMN` + - `ALTER COMMENT COLUMN` + - `ALTER CLEAR COLUMN` + - `ALTER RENAME COLUMN` + - `ALTER INDEX` + - `ALTER ORDER BY` + - `ALTER SAMPLE BY` + - `ALTER ADD INDEX` + - `ALTER DROP INDEX` + - `ALTER MATERIALIZE INDEX` + - `ALTER CLEAR INDEX` + - `ALTER CONSTRAINT` + - `ALTER ADD CONSTRAINT` + - `ALTER DROP CONSTRAINT` + - `ALTER TTL` + - `ALTER MATERIALIZE TTL` + - `ALTER SETTINGS` + - `ALTER MOVE PARTITION` + - `ALTER FETCH PARTITION` + - `ALTER FREEZE PARTITION` + - `ALTER VIEW` + - `ALTER VIEW REFRESH` + - `ALTER VIEW MODIFY QUERY` +- [CREATE](#grant-create) + - `CREATE DATABASE` + - `CREATE TABLE` + - `CREATE ARBITRARY TEMPORARY TABLE` + - `CREATE TEMPORARY TABLE` + - `CREATE VIEW` + - `CREATE DICTIONARY` + - `CREATE FUNCTION` +- [DROP](#grant-drop) + - `DROP DATABASE` + - `DROP TABLE` + - `DROP VIEW` + - `DROP DICTIONARY` + - `DROP FUNCTION` +- [TRUNCATE](#grant-truncate) +- [OPTIMIZE](#grant-optimize) +- [SHOW](#grant-show) + - `SHOW DATABASES` + - `SHOW TABLES` + - `SHOW COLUMNS` + - `SHOW DICTIONARIES` +- [KILL QUERY](#grant-kill-query) +- [ACCESS MANAGEMENT](#grant-access-management) + - `CREATE USER` + - `ALTER USER` + - `DROP USER` + - `CREATE ROLE` + - `ALTER ROLE` + - `DROP ROLE` + - `CREATE ROW POLICY` + - `ALTER ROW POLICY` + - `DROP ROW POLICY` + - `CREATE QUOTA` + - `ALTER QUOTA` + - `DROP QUOTA` + - `CREATE SETTINGS PROFILE` + - `ALTER SETTINGS PROFILE` + - `DROP SETTINGS PROFILE` + - `SHOW ACCESS` + - `SHOW_USERS` + - `SHOW_ROLES` + - `SHOW_ROW_POLICIES` + - `SHOW_QUOTAS` + - `SHOW_SETTINGS_PROFILES` + - `ROLE ADMIN` +- [SYSTEM](#grant-system) + - `SYSTEM SHUTDOWN` + - `SYSTEM DROP CACHE` + - `SYSTEM DROP DNS CACHE` + - `SYSTEM DROP MARK CACHE` + - `SYSTEM DROP UNCOMPRESSED CACHE` + - `SYSTEM RELOAD` + - `SYSTEM RELOAD CONFIG` + - `SYSTEM RELOAD DICTIONARY` + - `SYSTEM RELOAD EMBEDDED DICTIONARIES` + - `SYSTEM RELOAD FUNCTION` + - `SYSTEM RELOAD FUNCTIONS` + - `SYSTEM MERGES` + - `SYSTEM TTL MERGES` + - `SYSTEM FETCHES` + - `SYSTEM MOVES` + - `SYSTEM SENDS` + - `SYSTEM DISTRIBUTED SENDS` + - `SYSTEM REPLICATED SENDS` + - `SYSTEM REPLICATION QUEUES` + - `SYSTEM SYNC REPLICA` + - `SYSTEM RESTART REPLICA` + - `SYSTEM FLUSH` + - `SYSTEM FLUSH DISTRIBUTED` + - `SYSTEM FLUSH LOGS` + - `CLUSTER` (see also `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive) +- [INTROSPECTION](#grant-introspection) + - `addressToLine` + - `addressToLineWithInlines` + - `addressToSymbol` + - `demangle` +- [SOURCES](#grant-sources) + - `FILE` + - `URL` + - `REMOTE` + - `YSQL` + - `ODBC` + - `JDBC` + - `HDFS` + - `S3` +- [dictGet](#grant-dictget) Examples of how this hierarchy is treated: -- The `ALTER` privilege includes all other `ALTER*` privileges. -- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. +- The `ALTER` privilege includes all other `ALTER*` privileges. +- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. Privileges are applied at different levels. Knowing of a level suggests syntax available for privilege. Levels (from lower to higher): -- `COLUMN` — Privilege can be granted for column, table, database, or globally. -- `TABLE` — Privilege can be granted for table, database, or globally. -- `VIEW` — Privilege can be granted for view, database, or globally. -- `DICTIONARY` — Privilege can be granted for dictionary, database, or globally. -- `DATABASE` — Privilege can be granted for database or globally. -- `GLOBAL` — Privilege can be granted only globally. -- `GROUP` — Groups privileges of different levels. When `GROUP`-level privilege is granted, only that privileges from the group are granted which correspond to the used syntax. +- `COLUMN` — Privilege can be granted for column, table, database, or globally. +- `TABLE` — Privilege can be granted for table, database, or globally. +- `VIEW` — Privilege can be granted for view, database, or globally. +- `DICTIONARY` — Privilege can be granted for dictionary, database, or globally. +- `DATABASE` — Privilege can be granted for database or globally. +- `GLOBAL` — Privilege can be granted only globally. +- `GROUP` — Groups privileges of different levels. When `GROUP`-level privilege is granted, only that privileges from the group are granted which correspond to the used syntax. Examples of allowed syntax: -- `GRANT SELECT(x) ON db.table TO user` -- `GRANT SELECT ON db.* TO user` +- `GRANT SELECT(x) ON db.table TO user` +- `GRANT SELECT ON db.* TO user` Examples of disallowed syntax: -- `GRANT CREATE USER(x) ON db.table TO user` -- `GRANT CREATE USER ON db.* TO user` +- `GRANT CREATE USER(x) ON db.table TO user` +- `GRANT CREATE USER ON db.* TO user` The special privilege [ALL](#grant-all) grants all the privileges to a user account or a role. @@ -264,74 +276,74 @@ The granted privilege allows `john` to insert data to the `x` and/or `y` columns Allows executing [ALTER](../../sql-reference/statements/alter/index.md) queries according to the following hierarchy of privileges: -- `ALTER`. Level: `COLUMN`. - - `ALTER TABLE`. Level: `GROUP` - - `ALTER UPDATE`. Level: `COLUMN`. Aliases: `UPDATE` - - `ALTER DELETE`. Level: `COLUMN`. Aliases: `DELETE` - - `ALTER COLUMN`. Level: `GROUP` - - `ALTER ADD COLUMN`. Level: `COLUMN`. Aliases: `ADD COLUMN` - - `ALTER DROP COLUMN`. Level: `COLUMN`. Aliases: `DROP COLUMN` - - `ALTER MODIFY COLUMN`. Level: `COLUMN`. Aliases: `MODIFY COLUMN` - - `ALTER COMMENT COLUMN`. Level: `COLUMN`. Aliases: `COMMENT COLUMN` - - `ALTER CLEAR COLUMN`. Level: `COLUMN`. Aliases: `CLEAR COLUMN` - - `ALTER RENAME COLUMN`. Level: `COLUMN`. Aliases: `RENAME COLUMN` - - `ALTER INDEX`. Level: `GROUP`. Aliases: `INDEX` - - `ALTER ORDER BY`. Level: `TABLE`. Aliases: `ALTER MODIFY ORDER BY`, `MODIFY ORDER BY` - - `ALTER SAMPLE BY`. Level: `TABLE`. Aliases: `ALTER MODIFY SAMPLE BY`, `MODIFY SAMPLE BY` - - `ALTER ADD INDEX`. Level: `TABLE`. Aliases: `ADD INDEX` - - `ALTER DROP INDEX`. Level: `TABLE`. Aliases: `DROP INDEX` - - `ALTER MATERIALIZE INDEX`. Level: `TABLE`. Aliases: `MATERIALIZE INDEX` - - `ALTER CLEAR INDEX`. Level: `TABLE`. Aliases: `CLEAR INDEX` - - `ALTER CONSTRAINT`. Level: `GROUP`. Aliases: `CONSTRAINT` - - `ALTER ADD CONSTRAINT`. Level: `TABLE`. Aliases: `ADD CONSTRAINT` - - `ALTER DROP CONSTRAINT`. Level: `TABLE`. Aliases: `DROP CONSTRAINT` - - `ALTER TTL`. Level: `TABLE`. Aliases: `ALTER MODIFY TTL`, `MODIFY TTL` - - `ALTER MATERIALIZE TTL`. Level: `TABLE`. Aliases: `MATERIALIZE TTL` - - `ALTER SETTINGS`. Level: `TABLE`. Aliases: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING` - - `ALTER MOVE PARTITION`. Level: `TABLE`. Aliases: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART` - - `ALTER FETCH PARTITION`. Level: `TABLE`. Aliases: `ALTER FETCH PART`, `FETCH PARTITION`, `FETCH PART` - - `ALTER FREEZE PARTITION`. Level: `TABLE`. Aliases: `FREEZE PARTITION` - - `ALTER VIEW` Level: `GROUP` - - `ALTER VIEW REFRESH`. Level: `VIEW`. Aliases: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` - - `ALTER VIEW MODIFY QUERY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY QUERY` +- `ALTER`. Level: `COLUMN`. + - `ALTER TABLE`. Level: `GROUP` + - `ALTER UPDATE`. Level: `COLUMN`. Aliases: `UPDATE` + - `ALTER DELETE`. Level: `COLUMN`. Aliases: `DELETE` + - `ALTER COLUMN`. Level: `GROUP` + - `ALTER ADD COLUMN`. Level: `COLUMN`. Aliases: `ADD COLUMN` + - `ALTER DROP COLUMN`. Level: `COLUMN`. Aliases: `DROP COLUMN` + - `ALTER MODIFY COLUMN`. Level: `COLUMN`. Aliases: `MODIFY COLUMN` + - `ALTER COMMENT COLUMN`. Level: `COLUMN`. Aliases: `COMMENT COLUMN` + - `ALTER CLEAR COLUMN`. Level: `COLUMN`. Aliases: `CLEAR COLUMN` + - `ALTER RENAME COLUMN`. Level: `COLUMN`. Aliases: `RENAME COLUMN` + - `ALTER INDEX`. Level: `GROUP`. Aliases: `INDEX` + - `ALTER ORDER BY`. Level: `TABLE`. Aliases: `ALTER MODIFY ORDER BY`, `MODIFY ORDER BY` + - `ALTER SAMPLE BY`. Level: `TABLE`. Aliases: `ALTER MODIFY SAMPLE BY`, `MODIFY SAMPLE BY` + - `ALTER ADD INDEX`. Level: `TABLE`. Aliases: `ADD INDEX` + - `ALTER DROP INDEX`. Level: `TABLE`. Aliases: `DROP INDEX` + - `ALTER MATERIALIZE INDEX`. Level: `TABLE`. Aliases: `MATERIALIZE INDEX` + - `ALTER CLEAR INDEX`. Level: `TABLE`. Aliases: `CLEAR INDEX` + - `ALTER CONSTRAINT`. Level: `GROUP`. Aliases: `CONSTRAINT` + - `ALTER ADD CONSTRAINT`. Level: `TABLE`. Aliases: `ADD CONSTRAINT` + - `ALTER DROP CONSTRAINT`. Level: `TABLE`. Aliases: `DROP CONSTRAINT` + - `ALTER TTL`. Level: `TABLE`. Aliases: `ALTER MODIFY TTL`, `MODIFY TTL` + - `ALTER MATERIALIZE TTL`. Level: `TABLE`. Aliases: `MATERIALIZE TTL` + - `ALTER SETTINGS`. Level: `TABLE`. Aliases: `ALTER SETTING`, `ALTER MODIFY SETTING`, `MODIFY SETTING` + - `ALTER MOVE PARTITION`. Level: `TABLE`. Aliases: `ALTER MOVE PART`, `MOVE PARTITION`, `MOVE PART` + - `ALTER FETCH PARTITION`. Level: `TABLE`. Aliases: `ALTER FETCH PART`, `FETCH PARTITION`, `FETCH PART` + - `ALTER FREEZE PARTITION`. Level: `TABLE`. Aliases: `FREEZE PARTITION` + - `ALTER VIEW` Level: `GROUP` + - `ALTER VIEW REFRESH`. Level: `VIEW`. Aliases: `ALTER LIVE VIEW REFRESH`, `REFRESH VIEW` + - `ALTER VIEW MODIFY QUERY`. Level: `VIEW`. Aliases: `ALTER TABLE MODIFY QUERY` Examples of how this hierarchy is treated: -- The `ALTER` privilege includes all other `ALTER*` privileges. -- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. +- The `ALTER` privilege includes all other `ALTER*` privileges. +- `ALTER CONSTRAINT` includes `ALTER ADD CONSTRAINT` and `ALTER DROP CONSTRAINT` privileges. **Notes** -- The `MODIFY SETTING` privilege allows modifying table engine settings. It does not affect settings or server configuration parameters. -- The `ATTACH` operation needs the [CREATE](#grant-create) privilege. -- The `DETACH` operation needs the [DROP](#grant-drop) privilege. -- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/kill.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. +- The `MODIFY SETTING` privilege allows modifying table engine settings. It does not affect settings or server configuration parameters. +- The `ATTACH` operation needs the [CREATE](#grant-create) privilege. +- The `DETACH` operation needs the [DROP](#grant-drop) privilege. +- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/kill.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege. ### CREATE Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/attach.md) DDL-queries according to the following hierarchy of privileges: -- `CREATE`. Level: `GROUP` - - `CREATE DATABASE`. Level: `DATABASE` - - `CREATE TABLE`. Level: `TABLE` - - `CREATE ARBITRARY TEMPORARY TABLE`. Level: `GLOBAL` - - `CREATE TEMPORARY TABLE`. Level: `GLOBAL` - - `CREATE VIEW`. Level: `VIEW` - - `CREATE DICTIONARY`. Level: `DICTIONARY` +- `CREATE`. Level: `GROUP` + - `CREATE DATABASE`. Level: `DATABASE` + - `CREATE TABLE`. Level: `TABLE` + - `CREATE ARBITRARY TEMPORARY TABLE`. Level: `GLOBAL` + - `CREATE TEMPORARY TABLE`. Level: `GLOBAL` + - `CREATE VIEW`. Level: `VIEW` + - `CREATE DICTIONARY`. Level: `DICTIONARY` **Notes** -- To delete the created table, a user needs [DROP](#grant-drop). +- To delete the created table, a user needs [DROP](#grant-drop). ### DROP Allows executing [DROP](../../sql-reference/statements/drop.md) and [DETACH](../../sql-reference/statements/detach.md) queries according to the following hierarchy of privileges: -- `DROP`. Level: `GROUP` - - `DROP DATABASE`. Level: `DATABASE` - - `DROP TABLE`. Level: `TABLE` - - `DROP VIEW`. Level: `VIEW` - - `DROP DICTIONARY`. Level: `DICTIONARY` +- `DROP`. Level: `GROUP` + - `DROP DATABASE`. Level: `DATABASE` + - `DROP TABLE`. Level: `TABLE` + - `DROP VIEW`. Level: `VIEW` + - `DROP DICTIONARY`. Level: `DICTIONARY` ### TRUNCATE @@ -349,11 +361,11 @@ Privilege level: `TABLE`. Allows executing `SHOW`, `DESCRIBE`, `USE`, and `EXISTS` queries according to the following hierarchy of privileges: -- `SHOW`. Level: `GROUP` - - `SHOW DATABASES`. Level: `DATABASE`. Allows to execute `SHOW DATABASES`, `SHOW CREATE DATABASE`, `USE ` queries. - - `SHOW TABLES`. Level: `TABLE`. Allows to execute `SHOW TABLES`, `EXISTS `, `CHECK
` queries. - - `SHOW COLUMNS`. Level: `COLUMN`. Allows to execute `SHOW CREATE TABLE`, `DESCRIBE` queries. - - `SHOW DICTIONARIES`. Level: `DICTIONARY`. Allows to execute `SHOW DICTIONARIES`, `SHOW CREATE DICTIONARY`, `EXISTS ` queries. +- `SHOW`. Level: `GROUP` + - `SHOW DATABASES`. Level: `DATABASE`. Allows to execute `SHOW DATABASES`, `SHOW CREATE DATABASE`, `USE ` queries. + - `SHOW TABLES`. Level: `TABLE`. Allows to execute `SHOW TABLES`, `EXISTS
`, `CHECK
` queries. + - `SHOW COLUMNS`. Level: `COLUMN`. Allows to execute `SHOW CREATE TABLE`, `DESCRIBE` queries. + - `SHOW DICTIONARIES`. Level: `DICTIONARY`. Allows to execute `SHOW DICTIONARIES`, `SHOW CREATE DICTIONARY`, `EXISTS ` queries. **Notes** @@ -373,29 +385,29 @@ Privilege level: `GLOBAL`. Allows a user to execute queries that manage users, roles and row policies. -- `ACCESS MANAGEMENT`. Level: `GROUP` - - `CREATE USER`. Level: `GLOBAL` - - `ALTER USER`. Level: `GLOBAL` - - `DROP USER`. Level: `GLOBAL` - - `CREATE ROLE`. Level: `GLOBAL` - - `ALTER ROLE`. Level: `GLOBAL` - - `DROP ROLE`. Level: `GLOBAL` - - `ROLE ADMIN`. Level: `GLOBAL` - - `CREATE ROW POLICY`. Level: `GLOBAL`. Aliases: `CREATE POLICY` - - `ALTER ROW POLICY`. Level: `GLOBAL`. Aliases: `ALTER POLICY` - - `DROP ROW POLICY`. Level: `GLOBAL`. Aliases: `DROP POLICY` - - `CREATE QUOTA`. Level: `GLOBAL` - - `ALTER QUOTA`. Level: `GLOBAL` - - `DROP QUOTA`. Level: `GLOBAL` - - `CREATE SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `CREATE PROFILE` - - `ALTER SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `ALTER PROFILE` - - `DROP SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `DROP PROFILE` - - `SHOW ACCESS`. Level: `GROUP` - - `SHOW_USERS`. Level: `GLOBAL`. Aliases: `SHOW CREATE USER` - - `SHOW_ROLES`. Level: `GLOBAL`. Aliases: `SHOW CREATE ROLE` - - `SHOW_ROW_POLICIES`. Level: `GLOBAL`. Aliases: `SHOW POLICIES`, `SHOW CREATE ROW POLICY`, `SHOW CREATE POLICY` - - `SHOW_QUOTAS`. Level: `GLOBAL`. Aliases: `SHOW CREATE QUOTA` - - `SHOW_SETTINGS_PROFILES`. Level: `GLOBAL`. Aliases: `SHOW PROFILES`, `SHOW CREATE SETTINGS PROFILE`, `SHOW CREATE PROFILE` +- `ACCESS MANAGEMENT`. Level: `GROUP` + - `CREATE USER`. Level: `GLOBAL` + - `ALTER USER`. Level: `GLOBAL` + - `DROP USER`. Level: `GLOBAL` + - `CREATE ROLE`. Level: `GLOBAL` + - `ALTER ROLE`. Level: `GLOBAL` + - `DROP ROLE`. Level: `GLOBAL` + - `ROLE ADMIN`. Level: `GLOBAL` + - `CREATE ROW POLICY`. Level: `GLOBAL`. Aliases: `CREATE POLICY` + - `ALTER ROW POLICY`. Level: `GLOBAL`. Aliases: `ALTER POLICY` + - `DROP ROW POLICY`. Level: `GLOBAL`. Aliases: `DROP POLICY` + - `CREATE QUOTA`. Level: `GLOBAL` + - `ALTER QUOTA`. Level: `GLOBAL` + - `DROP QUOTA`. Level: `GLOBAL` + - `CREATE SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `CREATE PROFILE` + - `ALTER SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `ALTER PROFILE` + - `DROP SETTINGS PROFILE`. Level: `GLOBAL`. Aliases: `DROP PROFILE` + - `SHOW ACCESS`. Level: `GROUP` + - `SHOW_USERS`. Level: `GLOBAL`. Aliases: `SHOW CREATE USER` + - `SHOW_ROLES`. Level: `GLOBAL`. Aliases: `SHOW CREATE ROLE` + - `SHOW_ROW_POLICIES`. Level: `GLOBAL`. Aliases: `SHOW POLICIES`, `SHOW CREATE ROW POLICY`, `SHOW CREATE POLICY` + - `SHOW_QUOTAS`. Level: `GLOBAL`. Aliases: `SHOW CREATE QUOTA` + - `SHOW_SETTINGS_PROFILES`. Level: `GLOBAL`. Aliases: `SHOW PROFILES`, `SHOW CREATE SETTINGS PROFILE`, `SHOW CREATE PROFILE` The `ROLE ADMIN` privilege allows a user to assign and revoke any roles including those which are not assigned to the user with the admin option. @@ -403,29 +415,29 @@ The `ROLE ADMIN` privilege allows a user to assign and revoke any roles includin Allows a user to execute [SYSTEM](../../sql-reference/statements/system.md) queries according to the following hierarchy of privileges. -- `SYSTEM`. Level: `GROUP` - - `SYSTEM SHUTDOWN`. Level: `GLOBAL`. Aliases: `SYSTEM KILL`, `SHUTDOWN` - - `SYSTEM DROP CACHE`. Aliases: `DROP CACHE` - - `SYSTEM DROP DNS CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP DNS`, `DROP DNS CACHE`, `DROP DNS` - - `SYSTEM DROP MARK CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP MARK`, `DROP MARK CACHE`, `DROP MARKS` - - `SYSTEM DROP UNCOMPRESSED CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP UNCOMPRESSED`, `DROP UNCOMPRESSED CACHE`, `DROP UNCOMPRESSED` - - `SYSTEM RELOAD`. Level: `GROUP` - - `SYSTEM RELOAD CONFIG`. Level: `GLOBAL`. Aliases: `RELOAD CONFIG` - - `SYSTEM RELOAD DICTIONARY`. Level: `GLOBAL`. Aliases: `SYSTEM RELOAD DICTIONARIES`, `RELOAD DICTIONARY`, `RELOAD DICTIONARIES` - - `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Level: `GLOBAL`. Aliases: `RELOAD EMBEDDED DICTIONARIES` - - `SYSTEM MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP MERGES`, `SYSTEM START MERGES`, `STOP MERGES`, `START MERGES` - - `SYSTEM TTL MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP TTL MERGES`, `SYSTEM START TTL MERGES`, `STOP TTL MERGES`, `START TTL MERGES` - - `SYSTEM FETCHES`. Level: `TABLE`. Aliases: `SYSTEM STOP FETCHES`, `SYSTEM START FETCHES`, `STOP FETCHES`, `START FETCHES` - - `SYSTEM MOVES`. Level: `TABLE`. Aliases: `SYSTEM STOP MOVES`, `SYSTEM START MOVES`, `STOP MOVES`, `START MOVES` - - `SYSTEM SENDS`. Level: `GROUP`. Aliases: `SYSTEM STOP SENDS`, `SYSTEM START SENDS`, `STOP SENDS`, `START SENDS` - - `SYSTEM DISTRIBUTED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP DISTRIBUTED SENDS`, `SYSTEM START DISTRIBUTED SENDS`, `STOP DISTRIBUTED SENDS`, `START DISTRIBUTED SENDS` - - `SYSTEM REPLICATED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATED SENDS`, `SYSTEM START REPLICATED SENDS`, `STOP REPLICATED SENDS`, `START REPLICATED SENDS` - - `SYSTEM REPLICATION QUEUES`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATION QUEUES`, `SYSTEM START REPLICATION QUEUES`, `STOP REPLICATION QUEUES`, `START REPLICATION QUEUES` - - `SYSTEM SYNC REPLICA`. Level: `TABLE`. Aliases: `SYNC REPLICA` - - `SYSTEM RESTART REPLICA`. Level: `TABLE`. Aliases: `RESTART REPLICA` - - `SYSTEM FLUSH`. Level: `GROUP` - - `SYSTEM FLUSH DISTRIBUTED`. Level: `TABLE`. Aliases: `FLUSH DISTRIBUTED` - - `SYSTEM FLUSH LOGS`. Level: `GLOBAL`. Aliases: `FLUSH LOGS` +- `SYSTEM`. Level: `GROUP` + - `SYSTEM SHUTDOWN`. Level: `GLOBAL`. Aliases: `SYSTEM KILL`, `SHUTDOWN` + - `SYSTEM DROP CACHE`. Aliases: `DROP CACHE` + - `SYSTEM DROP DNS CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP DNS`, `DROP DNS CACHE`, `DROP DNS` + - `SYSTEM DROP MARK CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP MARK`, `DROP MARK CACHE`, `DROP MARKS` + - `SYSTEM DROP UNCOMPRESSED CACHE`. Level: `GLOBAL`. Aliases: `SYSTEM DROP UNCOMPRESSED`, `DROP UNCOMPRESSED CACHE`, `DROP UNCOMPRESSED` + - `SYSTEM RELOAD`. Level: `GROUP` + - `SYSTEM RELOAD CONFIG`. Level: `GLOBAL`. Aliases: `RELOAD CONFIG` + - `SYSTEM RELOAD DICTIONARY`. Level: `GLOBAL`. Aliases: `SYSTEM RELOAD DICTIONARIES`, `RELOAD DICTIONARY`, `RELOAD DICTIONARIES` + - `SYSTEM RELOAD EMBEDDED DICTIONARIES`. Level: `GLOBAL`. Aliases: `RELOAD EMBEDDED DICTIONARIES` + - `SYSTEM MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP MERGES`, `SYSTEM START MERGES`, `STOP MERGES`, `START MERGES` + - `SYSTEM TTL MERGES`. Level: `TABLE`. Aliases: `SYSTEM STOP TTL MERGES`, `SYSTEM START TTL MERGES`, `STOP TTL MERGES`, `START TTL MERGES` + - `SYSTEM FETCHES`. Level: `TABLE`. Aliases: `SYSTEM STOP FETCHES`, `SYSTEM START FETCHES`, `STOP FETCHES`, `START FETCHES` + - `SYSTEM MOVES`. Level: `TABLE`. Aliases: `SYSTEM STOP MOVES`, `SYSTEM START MOVES`, `STOP MOVES`, `START MOVES` + - `SYSTEM SENDS`. Level: `GROUP`. Aliases: `SYSTEM STOP SENDS`, `SYSTEM START SENDS`, `STOP SENDS`, `START SENDS` + - `SYSTEM DISTRIBUTED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP DISTRIBUTED SENDS`, `SYSTEM START DISTRIBUTED SENDS`, `STOP DISTRIBUTED SENDS`, `START DISTRIBUTED SENDS` + - `SYSTEM REPLICATED SENDS`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATED SENDS`, `SYSTEM START REPLICATED SENDS`, `STOP REPLICATED SENDS`, `START REPLICATED SENDS` + - `SYSTEM REPLICATION QUEUES`. Level: `TABLE`. Aliases: `SYSTEM STOP REPLICATION QUEUES`, `SYSTEM START REPLICATION QUEUES`, `STOP REPLICATION QUEUES`, `START REPLICATION QUEUES` + - `SYSTEM SYNC REPLICA`. Level: `TABLE`. Aliases: `SYNC REPLICA` + - `SYSTEM RESTART REPLICA`. Level: `TABLE`. Aliases: `RESTART REPLICA` + - `SYSTEM FLUSH`. Level: `GROUP` + - `SYSTEM FLUSH DISTRIBUTED`. Level: `TABLE`. Aliases: `FLUSH DISTRIBUTED` + - `SYSTEM FLUSH LOGS`. Level: `GLOBAL`. Aliases: `FLUSH LOGS` The `SYSTEM RELOAD EMBEDDED DICTIONARIES` privilege implicitly granted by the `SYSTEM RELOAD DICTIONARY ON *.*` privilege. @@ -433,36 +445,36 @@ The `SYSTEM RELOAD EMBEDDED DICTIONARIES` privilege implicitly granted by the `S Allows using [introspection](../../operations/optimizing-performance/sampling-query-profiler.md) functions. -- `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS` - - `addressToLine`. Level: `GLOBAL` - - `addressToLineWithInlines`. Level: `GLOBAL` - - `addressToSymbol`. Level: `GLOBAL` - - `demangle`. Level: `GLOBAL` +- `INTROSPECTION`. Level: `GROUP`. Aliases: `INTROSPECTION FUNCTIONS` + - `addressToLine`. Level: `GLOBAL` + - `addressToLineWithInlines`. Level: `GLOBAL` + - `addressToSymbol`. Level: `GLOBAL` + - `demangle`. Level: `GLOBAL` ### SOURCES Allows using external data sources. Applies to [table engines](../../engines/table-engines/index.md) and [table functions](../../sql-reference/table-functions/index.md#table-functions). -- `SOURCES`. Level: `GROUP` - - `FILE`. Level: `GLOBAL` - - `URL`. Level: `GLOBAL` - - `REMOTE`. Level: `GLOBAL` - - `YSQL`. Level: `GLOBAL` - - `ODBC`. Level: `GLOBAL` - - `JDBC`. Level: `GLOBAL` - - `HDFS`. Level: `GLOBAL` - - `S3`. Level: `GLOBAL` +- `SOURCES`. Level: `GROUP` + - `FILE`. Level: `GLOBAL` + - `URL`. Level: `GLOBAL` + - `REMOTE`. Level: `GLOBAL` + - `YSQL`. Level: `GLOBAL` + - `ODBC`. Level: `GLOBAL` + - `JDBC`. Level: `GLOBAL` + - `HDFS`. Level: `GLOBAL` + - `S3`. Level: `GLOBAL` The `SOURCES` privilege enables use of all the sources. Also you can grant a privilege for each source individually. To use sources, you need additional privileges. Examples: -- To create a table with the [MySQL table engine](../../engines/table-engines/integrations/mysql.md), you need `CREATE TABLE (ON db.table_name)` and `MYSQL` privileges. -- To use the [mysql table function](../../sql-reference/table-functions/mysql.md), you need `CREATE TEMPORARY TABLE` and `MYSQL` privileges. +- To create a table with the [MySQL table engine](../../engines/table-engines/integrations/mysql.md), you need `CREATE TABLE (ON db.table_name)` and `MYSQL` privileges. +- To use the [mysql table function](../../sql-reference/table-functions/mysql.md), you need `CREATE TEMPORARY TABLE` and `MYSQL` privileges. ### dictGet -- `dictGet`. Aliases: `dictHas`, `dictGetHierarchy`, `dictIsIn` +- `dictGet`. Aliases: `dictHas`, `dictGetHierarchy`, `dictIsIn` Allows a user to execute [dictGet](../../sql-reference/functions/ext-dict-functions.md#dictget), [dictHas](../../sql-reference/functions/ext-dict-functions.md#dicthas), [dictGetHierarchy](../../sql-reference/functions/ext-dict-functions.md#dictgethierarchy), [dictIsIn](../../sql-reference/functions/ext-dict-functions.md#dictisin) functions. @@ -470,8 +482,8 @@ Privilege level: `DICTIONARY`. **Examples** -- `GRANT dictGet ON mydb.mydictionary TO john` -- `GRANT dictGet ON mydictionary TO john` +- `GRANT dictGet ON mydb.mydictionary TO john` +- `GRANT dictGet ON mydictionary TO john` ### ALL diff --git a/docs/en/sql-reference/statements/index.md b/docs/en/sql-reference/statements/index.md index 100b8889aaa..5aa61cf8d21 100644 --- a/docs/en/sql-reference/statements/index.md +++ b/docs/en/sql-reference/statements/index.md @@ -8,25 +8,25 @@ sidebar_label: List of statements Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has it’s own syntax and usage details that are described separately: -- [SELECT](/docs/en/sql-reference/statements/select/index.md) -- [INSERT INTO](/docs/en/sql-reference/statements/insert-into.md) -- [CREATE](/docs/en/sql-reference/statements/create/index.md) -- [ALTER](/docs/en/sql-reference/statements/alter/index.md) -- [SYSTEM](/docs/en/sql-reference/statements/system.md) -- [SHOW](/docs/en/sql-reference/statements/show.md) -- [GRANT](/docs/en/sql-reference/statements/grant.md) -- [REVOKE](/docs/en/sql-reference/statements/revoke.md) -- [ATTACH](/docs/en/sql-reference/statements/attach.md) -- [CHECK TABLE](/docs/en/sql-reference/statements/check-table.md) -- [DESCRIBE TABLE](/docs/en/sql-reference/statements/describe-table.md) -- [DETACH](/docs/en/sql-reference/statements/detach.md) -- [DROP](/docs/en/sql-reference/statements/drop.md) -- [EXISTS](/docs/en/sql-reference/statements/exists.md) -- [KILL](/docs/en/sql-reference/statements/kill.md) -- [OPTIMIZE](/docs/en/sql-reference/statements/optimize.md) -- [RENAME](/docs/en/sql-reference/statements/rename.md) -- [SET](/docs/en/sql-reference/statements/set.md) -- [SET ROLE](/docs/en/sql-reference/statements/set-role.md) -- [TRUNCATE](/docs/en/sql-reference/statements/truncate.md) -- [USE](/docs/en/sql-reference/statements/use.md) -- [EXPLAIN](/docs/en/sql-reference/statements/explain.md) +- [SELECT](/docs/en/sql-reference/statements/select/index.md) +- [INSERT INTO](/docs/en/sql-reference/statements/insert-into.md) +- [CREATE](/docs/en/sql-reference/statements/create/index.md) +- [ALTER](/docs/en/sql-reference/statements/alter/index.md) +- [SYSTEM](/docs/en/sql-reference/statements/system.md) +- [SHOW](/docs/en/sql-reference/statements/show.md) +- [GRANT](/docs/en/sql-reference/statements/grant.md) +- [REVOKE](/docs/en/sql-reference/statements/revoke.md) +- [ATTACH](/docs/en/sql-reference/statements/attach.md) +- [CHECK TABLE](/docs/en/sql-reference/statements/check-table.md) +- [DESCRIBE TABLE](/docs/en/sql-reference/statements/describe-table.md) +- [DETACH](/docs/en/sql-reference/statements/detach.md) +- [DROP](/docs/en/sql-reference/statements/drop.md) +- [EXISTS](/docs/en/sql-reference/statements/exists.md) +- [KILL](/docs/en/sql-reference/statements/kill.md) +- [OPTIMIZE](/docs/en/sql-reference/statements/optimize.md) +- [RENAME](/docs/en/sql-reference/statements/rename.md) +- [SET](/docs/en/sql-reference/statements/set.md) +- [SET ROLE](/docs/en/sql-reference/statements/set-role.md) +- [TRUNCATE](/docs/en/sql-reference/statements/truncate.md) +- [USE](/docs/en/sql-reference/statements/use.md) +- [EXPLAIN](/docs/en/sql-reference/statements/explain.md) diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index 354ab95c598..d6e30827f9b 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -64,8 +64,8 @@ INSERT INTO insert_select_testtable VALUES (1, DEFAULT, 1) ; If a list of columns does not include all existing columns, the rest of the columns are filled with: -- The values calculated from the `DEFAULT` expressions specified in the table definition. -- Zeros and empty strings, if `DEFAULT` expressions are not defined. +- The values calculated from the `DEFAULT` expressions specified in the table definition. +- Zeros and empty strings, if `DEFAULT` expressions are not defined. Data can be passed to the INSERT in any [format](../../interfaces/formats.md#formats) supported by ClickHouse. The format must be specified explicitly in the query: @@ -208,22 +208,22 @@ Result: `INSERT` sorts the input data by primary key and splits them into partitions by a partition key. If you insert data into several partitions at once, it can significantly reduce the performance of the `INSERT` query. To avoid this: -- Add data in fairly large batches, such as 100,000 rows at a time. -- Group data by a partition key before uploading it to ClickHouse. +- Add data in fairly large batches, such as 100,000 rows at a time. +- Group data by a partition key before uploading it to ClickHouse. Performance will not decrease if: -- Data is added in real time. -- You upload data that is usually sorted by time. +- Data is added in real time. +- You upload data that is usually sorted by time. It's also possible to asynchronously insert data in small but frequent inserts. The data from such insertions is combined into batches and then safely inserted into a table. To enable the asynchronous mode, switch on the [async_insert](../../operations/settings/settings.md#async-insert) setting. Note that asynchronous insertions are supported only over HTTP protocol, and deduplication is not supported for them. **See Also** -- [async_insert](../../operations/settings/settings.md#async-insert) -- [async_insert_threads](../../operations/settings/settings.md#async-insert-threads) -- [wait_for_async_insert](../../operations/settings/settings.md#wait-for-async-insert) -- [wait_for_async_insert_timeout](../../operations/settings/settings.md#wait-for-async-insert-timeout) -- [async_insert_max_data_size](../../operations/settings/settings.md#async-insert-max-data-size) -- [async_insert_busy_timeout_ms](../../operations/settings/settings.md#async-insert-busy-timeout-ms) -- [async_insert_stale_timeout_ms](../../operations/settings/settings.md#async-insert-stale-timeout-ms) +- [async_insert](../../operations/settings/settings.md#async-insert) +- [async_insert_threads](../../operations/settings/settings.md#async-insert-threads) +- [wait_for_async_insert](../../operations/settings/settings.md#wait-for-async-insert) +- [wait_for_async_insert_timeout](../../operations/settings/settings.md#wait-for-async-insert-timeout) +- [async_insert_max_data_size](../../operations/settings/settings.md#async-insert-max-data-size) +- [async_insert_busy_timeout_ms](../../operations/settings/settings.md#async-insert-busy-timeout-ms) +- [async_insert_stale_timeout_ms](../../operations/settings/settings.md#async-insert-stale-timeout-ms) diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 427ee75cd5f..8a7411b2594 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -21,10 +21,10 @@ The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/me When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `2`) or on current replica (if the [alter_sync](../../operations/settings/settings.md#alter-sync) setting is set to `1`). -- If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. -- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression). -- If you specify `FINAL`, optimization is performed even when all the data is already in one part. You can control this behaviour with [optimize_skip_merged_partitions](../../operations/settings/settings.md#optimize-skip-merged-partitions). Also, the merge is forced even if concurrent merges are performed. -- If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine. +- If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. +- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression). +- If you specify `FINAL`, optimization is performed even when all the data is already in one part. You can control this behaviour with [optimize_skip_merged_partitions](../../operations/settings/settings.md#optimize-skip-merged-partitions). Also, the merge is forced even if concurrent merges are performed. +- If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine. You can specify how long (in seconds) to wait for inactive replicas to execute `OPTIMIZE` queries by the [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting. diff --git a/docs/en/sql-reference/statements/rename.md b/docs/en/sql-reference/statements/rename.md index cc33a7c41d4..a2d4b15df13 100644 --- a/docs/en/sql-reference/statements/rename.md +++ b/docs/en/sql-reference/statements/rename.md @@ -60,4 +60,4 @@ RENAME DICTIONARY [db0.]dict_A TO [db1.]dict_B [,...] [ON CLUSTER cluster] **See Also** -- [Dictionaries](../../sql-reference/dictionaries/index.md) +- [Dictionaries](../../sql-reference/dictionaries/index.md) diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index 3d88a0f9b7a..9045ec4aba3 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -23,8 +23,8 @@ You can specify only one `ARRAY JOIN` clause in a `SELECT` query. Supported types of `ARRAY JOIN` are listed below: -- `ARRAY JOIN` - In base case, empty arrays are not included in the result of `JOIN`. -- `LEFT ARRAY JOIN` - The result of `JOIN` contains rows with empty arrays. The value for an empty array is set to the default value for the array element type (usually 0, empty string or NULL). +- `ARRAY JOIN` - In base case, empty arrays are not included in the result of `JOIN`. +- `LEFT ARRAY JOIN` - The result of `JOIN` contains rows with empty arrays. The value for an empty array is set to the default value for the array element type (usually 0, empty string or NULL). ## Basic ARRAY JOIN Examples diff --git a/docs/en/sql-reference/statements/select/distinct.md b/docs/en/sql-reference/statements/select/distinct.md index b2d940af3bb..10326b0ef8f 100644 --- a/docs/en/sql-reference/statements/select/distinct.md +++ b/docs/en/sql-reference/statements/select/distinct.md @@ -105,6 +105,6 @@ Take this implementation specificity into account when programming queries. It is possible to obtain the same result by applying [GROUP BY](../../../sql-reference/statements/select/group-by.md) across the same set of values as specified as `SELECT` clause, without using any aggregate functions. But there are few differences from `GROUP BY` approach: -- `DISTINCT` can be applied together with `GROUP BY`. -- When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read. -- Data blocks are output as they are processed, without waiting for the entire query to finish running. +- `DISTINCT` can be applied together with `GROUP BY`. +- When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read. +- Data blocks are output as they are processed, without waiting for the entire query to finish running. diff --git a/docs/en/sql-reference/statements/select/except.md b/docs/en/sql-reference/statements/select/except.md index f73cbff9819..cc4bb9d1c24 100644 --- a/docs/en/sql-reference/statements/select/except.md +++ b/docs/en/sql-reference/statements/select/except.md @@ -149,5 +149,5 @@ Result: **See Also** -- [UNION](union.md#union-clause) -- [INTERSECT](intersect.md#intersect-clause) +- [UNION](union.md#union-clause) +- [INTERSECT](intersect.md#intersect-clause) diff --git a/docs/en/sql-reference/statements/select/from.md b/docs/en/sql-reference/statements/select/from.md index fb6c1f94902..4ca8e8287c0 100644 --- a/docs/en/sql-reference/statements/select/from.md +++ b/docs/en/sql-reference/statements/select/from.md @@ -7,9 +7,9 @@ sidebar_label: FROM The `FROM` clause specifies the source to read data from: -- [Table](../../../engines/table-engines/index.md) -- [Subquery](../../../sql-reference/statements/select/index.md) -- [Table function](../../../sql-reference/table-functions/index.md#table-functions) +- [Table](../../../engines/table-engines/index.md) +- [Subquery](../../../sql-reference/statements/select/index.md) +- [Table function](../../../sql-reference/table-functions/index.md#table-functions) [JOIN](../../../sql-reference/statements/select/join.md) and [ARRAY JOIN](../../../sql-reference/statements/select/array-join.md) clauses may also be used to extend the functionality of the `FROM` clause. @@ -31,8 +31,8 @@ There are drawbacks to using `FINAL` (see below). Queries that use `FINAL` are executed slightly slower than similar queries that do not, because: -- Data is merged during query execution. -- Queries with `FINAL` read primary key columns in addition to the columns specified in the query. +- Data is merged during query execution. +- Queries with `FINAL` read primary key columns in addition to the columns specified in the query. **In most cases, avoid using `FINAL`.** The common approach is to use different queries that assume the background processes of the `MergeTree` engine have’t happened yet and deal with it by applying aggregation (for example, to discard duplicates). diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index 1018b24f50b..36d401ba04a 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -7,9 +7,9 @@ sidebar_label: GROUP BY `GROUP BY` clause switches the `SELECT` query into an aggregation mode, which works as follows: -- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”. -- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. -- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually, this significantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. +- `GROUP BY` clause contains a list of expressions (or a single expression, which is considered to be the list of length one). This list acts as a “grouping key”, while each individual expression will be referred to as a “key expression”. +- All the expressions in the [SELECT](../../../sql-reference/statements/select/index.md), [HAVING](../../../sql-reference/statements/select/having.md), and [ORDER BY](../../../sql-reference/statements/select/order-by.md) clauses **must** be calculated based on key expressions **or** on [aggregate functions](../../../sql-reference/aggregate-functions/index.md) over non-key expressions (including plain columns). In other words, each column selected from the table must be used either in a key expression or inside an aggregate function, but not both. +- Result of aggregating `SELECT` query will contain as many rows as there were unique values of “grouping key” in source table. Usually, this significantly reduces the row count, often by orders of magnitude, but not necessarily: row count stays the same if all “grouping key” values were distinct. When you want to group data in the table by column numbers instead of column names, enable the setting [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). @@ -220,11 +220,11 @@ If the `WITH TOTALS` modifier is specified, another row will be calculated. This This extra row is only produced in `JSON*`, `TabSeparated*`, and `Pretty*` formats, separately from the other rows: -- In `XML` and `JSON*` formats, this row is output as a separate ‘totals’ field. -- In `TabSeparated*`, `CSV*` and `Vertical` formats, the row comes after the main result, preceded by an empty row (after the other data). -- In `Pretty*` formats, the row is output as a separate table after the main result. -- In `Template` format, the row is output according to specified template. -- In the other formats it is not available. +- In `XML` and `JSON*` formats, this row is output as a separate ‘totals’ field. +- In `TabSeparated*`, `CSV*` and `Vertical` formats, the row comes after the main result, preceded by an empty row (after the other data). +- In `Pretty*` formats, the row is output as a separate table after the main result. +- In `Template` format, the row is output according to specified template. +- In the other formats it is not available. :::note totals is output in the results of `SELECT` queries, and is not output in `INSERT INTO ... SELECT`. diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index f65e40dede5..2863c5c0116 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -34,24 +34,24 @@ All clauses are optional, except for the required list of expressions immediatel Specifics of each optional clause are covered in separate sections, which are listed in the same order as they are executed: -- [WITH clause](../../../sql-reference/statements/select/with.md) -- [SELECT clause](#select-clause) -- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) -- [FROM clause](../../../sql-reference/statements/select/from.md) -- [SAMPLE clause](../../../sql-reference/statements/select/sample.md) -- [JOIN clause](../../../sql-reference/statements/select/join.md) -- [PREWHERE clause](../../../sql-reference/statements/select/prewhere.md) -- [WHERE clause](../../../sql-reference/statements/select/where.md) -- [GROUP BY clause](../../../sql-reference/statements/select/group-by.md) -- [LIMIT BY clause](../../../sql-reference/statements/select/limit-by.md) -- [HAVING clause](../../../sql-reference/statements/select/having.md) -- [LIMIT clause](../../../sql-reference/statements/select/limit.md) -- [OFFSET clause](../../../sql-reference/statements/select/offset.md) -- [UNION clause](../../../sql-reference/statements/select/union.md) -- [INTERSECT clause](../../../sql-reference/statements/select/intersect.md) -- [EXCEPT clause](../../../sql-reference/statements/select/except.md) -- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md) -- [FORMAT clause](../../../sql-reference/statements/select/format.md) +- [WITH clause](../../../sql-reference/statements/select/with.md) +- [SELECT clause](#select-clause) +- [DISTINCT clause](../../../sql-reference/statements/select/distinct.md) +- [FROM clause](../../../sql-reference/statements/select/from.md) +- [SAMPLE clause](../../../sql-reference/statements/select/sample.md) +- [JOIN clause](../../../sql-reference/statements/select/join.md) +- [PREWHERE clause](../../../sql-reference/statements/select/prewhere.md) +- [WHERE clause](../../../sql-reference/statements/select/where.md) +- [GROUP BY clause](../../../sql-reference/statements/select/group-by.md) +- [LIMIT BY clause](../../../sql-reference/statements/select/limit-by.md) +- [HAVING clause](../../../sql-reference/statements/select/having.md) +- [LIMIT clause](../../../sql-reference/statements/select/limit.md) +- [OFFSET clause](../../../sql-reference/statements/select/offset.md) +- [UNION clause](../../../sql-reference/statements/select/union.md) +- [INTERSECT clause](../../../sql-reference/statements/select/intersect.md) +- [EXCEPT clause](../../../sql-reference/statements/select/except.md) +- [INTO OUTFILE clause](../../../sql-reference/statements/select/into-outfile.md) +- [FORMAT clause](../../../sql-reference/statements/select/format.md) ## SELECT Clause @@ -123,11 +123,11 @@ Columns that matched the `COLUMNS` expression can have different data types. If You can put an asterisk in any part of a query instead of an expression. When the query is analyzed, the asterisk is expanded to a list of all table columns (excluding the `MATERIALIZED` and `ALIAS` columns). There are only a few cases when using an asterisk is justified: -- When creating a table dump. -- For tables containing just a few columns, such as system tables. -- For getting information about what columns are in a table. In this case, set `LIMIT 1`. But it is better to use the `DESC TABLE` query. -- When there is strong filtration on a small number of columns using `PREWHERE`. -- In subqueries (since columns that aren’t needed for the external query are excluded from subqueries). +- When creating a table dump. +- For tables containing just a few columns, such as system tables. +- For getting information about what columns are in a table. In this case, set `LIMIT 1`. But it is better to use the `DESC TABLE` query. +- When there is strong filtration on a small number of columns using `PREWHERE`. +- In subqueries (since columns that aren’t needed for the external query are excluded from subqueries). In all other cases, we do not recommend using the asterisk, since it only gives you the drawbacks of a columnar DBMS instead of the advantages. In other words using the asterisk is not recommended. @@ -151,17 +151,17 @@ The `GROUP BY`, `ORDER BY`, and `LIMIT BY` clauses can support positional argume If the query omits the `DISTINCT`, `GROUP BY` and `ORDER BY` clauses and the `IN` and `JOIN` subqueries, the query will be completely stream processed, using O(1) amount of RAM. Otherwise, the query might consume a lot of RAM if the appropriate restrictions are not specified: -- `max_memory_usage` -- `max_rows_to_group_by` -- `max_rows_to_sort` -- `max_rows_in_distinct` -- `max_bytes_in_distinct` -- `max_rows_in_set` -- `max_bytes_in_set` -- `max_rows_in_join` -- `max_bytes_in_join` -- `max_bytes_before_external_sort` -- `max_bytes_before_external_group_by` +- `max_memory_usage` +- `max_rows_to_group_by` +- `max_rows_to_sort` +- `max_rows_in_distinct` +- `max_bytes_in_distinct` +- `max_rows_in_set` +- `max_bytes_in_set` +- `max_rows_in_join` +- `max_bytes_in_join` +- `max_bytes_before_external_sort` +- `max_bytes_before_external_group_by` For more information, see the section “Settings”. It is possible to use external sorting (saving temporary tables to a disk) and external aggregation. diff --git a/docs/en/sql-reference/statements/select/intersect.md b/docs/en/sql-reference/statements/select/intersect.md index ea7a39421a5..8c0ee6b2e96 100644 --- a/docs/en/sql-reference/statements/select/intersect.md +++ b/docs/en/sql-reference/statements/select/intersect.md @@ -153,5 +153,5 @@ Result: **See Also** -- [UNION](union.md#union-clause) -- [EXCEPT](except.md#except-clause) +- [UNION](union.md#union-clause) +- [EXCEPT](except.md#except-clause) diff --git a/docs/en/sql-reference/statements/select/into-outfile.md b/docs/en/sql-reference/statements/select/into-outfile.md index a14b23f6689..bd6db9e7d55 100644 --- a/docs/en/sql-reference/statements/select/into-outfile.md +++ b/docs/en/sql-reference/statements/select/into-outfile.md @@ -21,10 +21,10 @@ SELECT INTO OUTFILE file_name [AND STDOUT] [COMPRESSION type [LEVEL ## Implementation Details -- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail. -- The query will fail if a file with the same file name already exists. -- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it. -- If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output. +- This functionality is available in the [command-line client](../../../interfaces/cli.md) and [clickhouse-local](../../../operations/utilities/clickhouse-local.md). Thus a query sent via [HTTP interface](../../../interfaces/http.md) will fail. +- The query will fail if a file with the same file name already exists. +- The default [output format](../../../interfaces/formats.md) is `TabSeparated` (like in the command-line client batch mode). Use [FORMAT](format.md) clause to change it. +- If `AND STDOUT` is mentioned in the query then the output that is written to the file is also displayed on standard output. If used with compression, the plaintext is displayed on standard output. **Example** diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index ece60961aaf..08ffae838f8 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -18,24 +18,28 @@ FROM Expressions from `ON` clause and columns from `USING` clause are called “join keys”. Unless otherwise stated, join produces a [Cartesian product](https://en.wikipedia.org/wiki/Cartesian_product) from rows with matching “join keys”, which might produce results with much more rows than the source tables. +## Related Content + +- Blog: [ClickHouse: A Blazingly Fast DBMS with Full SQL Join Support - Part 1](https://clickhouse.com/blog/clickhouse-fully-supports-joins) + ## Supported Types of JOIN All standard [SQL JOIN](https://en.wikipedia.org/wiki/Join_(SQL)) types are supported: -- `INNER JOIN`, only matching rows are returned. -- `LEFT OUTER JOIN`, non-matching rows from left table are returned in addition to matching rows. -- `RIGHT OUTER JOIN`, non-matching rows from right table are returned in addition to matching rows. -- `FULL OUTER JOIN`, non-matching rows from both tables are returned in addition to matching rows. -- `CROSS JOIN`, produces cartesian product of whole tables, “join keys” are **not** specified. +- `INNER JOIN`, only matching rows are returned. +- `LEFT OUTER JOIN`, non-matching rows from left table are returned in addition to matching rows. +- `RIGHT OUTER JOIN`, non-matching rows from right table are returned in addition to matching rows. +- `FULL OUTER JOIN`, non-matching rows from both tables are returned in addition to matching rows. +- `CROSS JOIN`, produces cartesian product of whole tables, “join keys” are **not** specified. `JOIN` without specified type implies `INNER`. Keyword `OUTER` can be safely omitted. Alternative syntax for `CROSS JOIN` is specifying multiple tables in [FROM clause](../../../sql-reference/statements/select/from.md) separated by commas. Additional join types available in ClickHouse: -- `LEFT SEMI JOIN` and `RIGHT SEMI JOIN`, a whitelist on “join keys”, without producing a cartesian product. -- `LEFT ANTI JOIN` and `RIGHT ANTI JOIN`, a blacklist on “join keys”, without producing a cartesian product. -- `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. -- `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below. +- `LEFT SEMI JOIN` and `RIGHT SEMI JOIN`, a whitelist on “join keys”, without producing a cartesian product. +- `LEFT ANTI JOIN` and `RIGHT ANTI JOIN`, a blacklist on “join keys”, without producing a cartesian product. +- `LEFT ANY JOIN`, `RIGHT ANY JOIN` and `INNER ANY JOIN`, partially (for opposite side of `LEFT` and `RIGHT`) or completely (for `INNER` and `FULL`) disables the cartesian product for standard `JOIN` types. +- `ASOF JOIN` and `LEFT ASOF JOIN`, joining sequences with a non-exact match. `ASOF JOIN` usage is described below. :::note When [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) is set to `partial_merge`, `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). @@ -162,9 +166,9 @@ Result: Algorithm requires the special column in tables. This column: -- Must contain an ordered sequence. -- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). -- Can’t be the only column in the `JOIN` clause. +- Must contain an ordered sequence. +- Can be one of the following types: [Int, UInt](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md), [Date](../../../sql-reference/data-types/date.md), [DateTime](../../../sql-reference/data-types/datetime.md), [Decimal](../../../sql-reference/data-types/decimal.md). +- Can’t be the only column in the `JOIN` clause. Syntax `ASOF JOIN ... ON`: @@ -194,7 +198,7 @@ For example, consider the following tables: table_1 table_2 event | ev_time | user_id event | ev_time | user_id - ----------|---------|---------- ----------|---------|---------- + ----------|---------|---------- ----------|---------|---------- ... ... event_1_1 | 12:00 | 42 event_2_1 | 11:59 | 42 ... event_2_2 | 12:30 | 42 @@ -211,8 +215,8 @@ For example, consider the following tables: There are two ways to execute join involving distributed tables: -- When using a normal `JOIN`, the query is sent to remote servers. Subqueries are run on each of them in order to make the right table, and the join is performed with this table. In other words, the right table is formed on each server separately. -- When using `GLOBAL ... JOIN`, first the requestor server runs a subquery to calculate the right table. This temporary table is passed to each remote server, and queries are run on them using the temporary data that was transmitted. +- When using a normal `JOIN`, the query is sent to remote servers. Subqueries are run on each of them in order to make the right table, and the join is performed with this table. In other words, the right table is formed on each server separately. +- When using `GLOBAL ... JOIN`, first the requestor server runs a subquery to calculate the right table. This temporary table is passed to each remote server, and queries are run on them using the temporary data that was transmitted. Be careful when using `GLOBAL`. For more information, see the [Distributed subqueries](../../../sql-reference/operators/in.md#select-distributed-subqueries) section. @@ -270,12 +274,12 @@ The `USING` clause specifies one or more columns to join, which establishes the For multiple `JOIN` clauses in a single `SELECT` query: -- Taking all the columns via `*` is available only if tables are joined, not subqueries. -- The `PREWHERE` clause is not available. +- Taking all the columns via `*` is available only if tables are joined, not subqueries. +- The `PREWHERE` clause is not available. For `ON`, `WHERE`, and `GROUP BY` clauses: -- Arbitrary expressions cannot be used in `ON`, `WHERE`, and `GROUP BY` clauses, but you can define an expression in a `SELECT` clause and then use it in these clauses via an alias. +- Arbitrary expressions cannot be used in `ON`, `WHERE`, and `GROUP BY` clauses, but you can define an expression in a `SELECT` clause and then use it in these clauses via an alias. ### Performance @@ -293,8 +297,8 @@ By default, ClickHouse uses the [hash join](https://en.wikipedia.org/wiki/Hash_j If you need to restrict `JOIN` operation memory consumption use the following settings: -- [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) — Limits number of rows in the hash table. -- [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) — Limits size of the hash table. +- [max_rows_in_join](../../../operations/settings/query-complexity.md#settings-max_rows_in_join) — Limits number of rows in the hash table. +- [max_bytes_in_join](../../../operations/settings/query-complexity.md#settings-max_bytes_in_join) — Limits size of the hash table. When any of these limits is reached, ClickHouse acts as the [join_overflow_mode](../../../operations/settings/query-complexity.md#settings-join_overflow_mode) setting instructs. diff --git a/docs/en/sql-reference/statements/select/limit-by.md b/docs/en/sql-reference/statements/select/limit-by.md index 28f3d7e86d7..4cfc56ecbf9 100644 --- a/docs/en/sql-reference/statements/select/limit-by.md +++ b/docs/en/sql-reference/statements/select/limit-by.md @@ -9,8 +9,8 @@ A query with the `LIMIT n BY expressions` clause selects the first `n` rows for ClickHouse supports the following syntax variants: -- `LIMIT [offset_value, ]n BY expressions` -- `LIMIT n OFFSET offset_value BY expressions` +- `LIMIT [offset_value, ]n BY expressions` +- `LIMIT n OFFSET offset_value BY expressions` During query processing, ClickHouse selects data ordered by sorting key. The sorting key is set explicitly using an [ORDER BY](order-by.md#select-order-by) clause or implicitly as a property of the table engine (row order is only guaranteed when using [ORDER BY](order-by.md#select-order-by), otherwise the row blocks will not be ordered due to multi-threading). Then ClickHouse applies `LIMIT n BY expressions` and returns the first `n` rows for each distinct combination of `expressions`. If `OFFSET` is specified, then for each data block that belongs to a distinct combination of `expressions`, ClickHouse skips `offset_value` number of rows from the beginning of the block and returns a maximum of `n` rows as a result. If `offset_value` is bigger than the number of rows in the data block, ClickHouse returns zero rows from the block. diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index e231a1cc72c..62feca9ecf6 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -16,8 +16,8 @@ If the ORDER BY clause is omitted, the order of the rows is also undefined, and There are two approaches to `NaN` and `NULL` sorting order: -- By default or with the `NULLS LAST` modifier: first the values, then `NaN`, then `NULL`. -- With the `NULLS FIRST` modifier: first `NULL`, then `NaN`, then other values. +- By default or with the `NULLS LAST` modifier: first the values, then `NaN`, then `NULL`. +- With the `NULLS FIRST` modifier: first `NULL`, then `NaN`, then other values. ### Example diff --git a/docs/en/sql-reference/statements/select/sample.md b/docs/en/sql-reference/statements/select/sample.md index fb44d7c5a44..a0c9257ed94 100644 --- a/docs/en/sql-reference/statements/select/sample.md +++ b/docs/en/sql-reference/statements/select/sample.md @@ -11,9 +11,9 @@ When data sampling is enabled, the query is not performed on all the data, but o Approximated query processing can be useful in the following cases: -- When you have strict latency requirements (like below 100ms) but you can’t justify the cost of additional hardware resources to meet them. -- When your raw data is not accurate, so approximation does not noticeably degrade the quality. -- Business requirements target approximate results (for cost-effectiveness, or to market exact results to premium users). +- When you have strict latency requirements (like below 100ms) but you can’t justify the cost of additional hardware resources to meet them. +- When your raw data is not accurate, so approximation does not noticeably degrade the quality. +- Business requirements target approximate results (for cost-effectiveness, or to market exact results to premium users). :::note You can only use sampling with the tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family, and only if the sampling expression was specified during table creation (see [MergeTree engine](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). @@ -21,9 +21,9 @@ You can only use sampling with the tables in the [MergeTree](../../../engines/ta The features of data sampling are listed below: -- Data sampling is a deterministic mechanism. The result of the same `SELECT .. SAMPLE` query is always the same. -- Sampling works consistently for different tables. For tables with a single sampling key, a sample with the same coefficient always selects the same subset of possible data. For example, a sample of user IDs takes rows with the same subset of all the possible user IDs from different tables. This means that you can use the sample in subqueries in the [IN](../../../sql-reference/operators/in.md) clause. Also, you can join samples using the [JOIN](../../../sql-reference/statements/select/join.md) clause. -- Sampling allows reading less data from a disk. Note that you must specify the sampling key correctly. For more information, see [Creating a MergeTree Table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). +- Data sampling is a deterministic mechanism. The result of the same `SELECT .. SAMPLE` query is always the same. +- Sampling works consistently for different tables. For tables with a single sampling key, a sample with the same coefficient always selects the same subset of possible data. For example, a sample of user IDs takes rows with the same subset of all the possible user IDs from different tables. This means that you can use the sample in subqueries in the [IN](../../../sql-reference/operators/in.md) clause. Also, you can join samples using the [JOIN](../../../sql-reference/statements/select/join.md) clause. +- Sampling allows reading less data from a disk. Note that you must specify the sampling key correctly. For more information, see [Creating a MergeTree Table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). For the `SAMPLE` clause the following syntax is supported: diff --git a/docs/en/sql-reference/statements/select/union.md b/docs/en/sql-reference/statements/select/union.md index 002aeaa4488..92a4ed1bb20 100644 --- a/docs/en/sql-reference/statements/select/union.md +++ b/docs/en/sql-reference/statements/select/union.md @@ -81,8 +81,8 @@ Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneo **See Also** -- [insert_null_as_default](../../../operations/settings/settings.md#insert_null_as_default) setting. -- [union_default_mode](../../../operations/settings/settings.md#union-default-mode) setting. +- [insert_null_as_default](../../../operations/settings/settings.md#insert_null_as_default) setting. +- [union_default_mode](../../../operations/settings/settings.md#union-default-mode) setting. [Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/union/) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index f6d6d51b123..428a04ae030 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -6,13 +6,15 @@ sidebar_label: SHOW # SHOW Statements -## SHOW CREATE TABLE +## SHOW CREATE TABLE | DICTIONARY | VIEW | DATABASE ``` sql -SHOW CREATE [TEMPORARY] [TABLE|DICTIONARY|VIEW] [db.]table|view [INTO OUTFILE filename] [FORMAT format] +SHOW [CREATE] [TEMPORARY] TABLE|DICTIONARY|VIEW|DATABASE [db.]table|view [INTO OUTFILE filename] [FORMAT format] ``` -Returns a single `String`-type ‘statement’ column, which contains a single value – the `CREATE` query used for creating the specified object. +Returns a single column of type String containing the CREATE query used for creating the specified object. + +`SHOW TABLE t` and `SHOW DATABASE db` have the same meaning as `SHOW CREATE TABLE|DATABASE t|db`, but `SHOW t` and `SHOW db` are not supported. Note that if you use this statement to get `CREATE` query of system tables, you will get a *fake* query, which only declares table structure, but cannot be used to create table. @@ -94,7 +96,7 @@ Result: **See also** -- [CREATE DATABASE](https://clickhouse.com/docs/en/sql-reference/statements/create/database/#query-language-create-database) +- [CREATE DATABASE](https://clickhouse.com/docs/en/sql-reference/statements/create/database/#query-language-create-database) ## SHOW PROCESSLIST @@ -193,8 +195,8 @@ Result: **See also** -- [Create Tables](https://clickhouse.com/docs/en/getting-started/tutorial/#create-tables) -- [SHOW CREATE TABLE](https://clickhouse.com/docs/en/sql-reference/statements/show/#show-create-table) +- [Create Tables](https://clickhouse.com/docs/en/getting-started/tutorial/#create-tables) +- [SHOW CREATE TABLE](https://clickhouse.com/docs/en/sql-reference/statements/show/#show-create-table) ## SHOW COLUMNS @@ -242,7 +244,7 @@ Result: **See also** -- [system.columns](https://clickhouse.com/docs/en/operations/system-tables/columns) +- [system.columns](https://clickhouse.com/docs/en/operations/system-tables/columns) ## SHOW DICTIONARIES @@ -558,7 +560,7 @@ Result: **See Also** -- [system.settings](../../operations/system-tables/settings.md) table +- [system.settings](../../operations/system-tables/settings.md) table ## SHOW ENGINES @@ -570,4 +572,4 @@ Outputs the content of the [system.table_engines](../../operations/system-tables **See Also** -- [system.table_engines](../../operations/system-tables/table_engines.md) table +- [system.table_engines](../../operations/system-tables/table_engines.md) table diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md new file mode 100644 index 00000000000..40ac1ab4f99 --- /dev/null +++ b/docs/en/sql-reference/statements/undrop.md @@ -0,0 +1,99 @@ +--- +slug: /en/sql-reference/statements/undrop +sidebar_label: UNDROP +--- + +# UNDROP TABLE + +Cancels the dropping of the table. + +Beginning with ClickHouse version 23.3 it is possible to UNDROP a table in an Atomic database +within `database_atomic_delay_before_drop_table_sec` (8 minutes by default) of issuing the DROP TABLE statement. Dropped tables are listed in +a system table called `system.dropped_tables`. + +If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view. + +:::note +UNDROP TABLE is experimental. To use it add this setting: +```sql +set allow_experimental_undrop_table_query = 1; +``` +::: + +:::tip +Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md) +::: + +Syntax: + +``` sql +UNDROP TABLE [db.]name [UUID ''] [ON CLUSTER cluster] +``` + +**Example** + +``` sql +set allow_experimental_undrop_table_query = 1; +``` + +```sql +CREATE TABLE undropMe +( + `id` UInt8 +) +ENGINE = MergeTree +ORDER BY id +``` + +```sql +DROP TABLE undropMe +``` +```sql +SELECT * +FROM system.dropped_tables +FORMAT Vertical +``` +```response +Row 1: +────── +index: 0 +database: default +table: undropMe +uuid: aa696a1a-1d70-4e60-a841-4c80827706cc +engine: MergeTree +metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql +table_dropped_time: 2023-04-05 14:12:12 + +1 row in set. Elapsed: 0.001 sec. +``` +```sql +UNDROP TABLE undropMe +``` +```response +Ok. +``` +```sql +SELECT * +FROM system.dropped_tables +FORMAT Vertical +``` +```response +Ok. + +0 rows in set. Elapsed: 0.001 sec. +``` +```sql +DESCRIBE TABLE undropMe +FORMAT Vertical +``` +```response +Row 1: +────── +name: id +type: UInt8 +default_type: +default_expression: +comment: +codec_expression: +ttl_expression: +``` diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 63c5042f9e8..f5651c2dcb6 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -14,7 +14,7 @@ The `INSERT` query uses both parsers: INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ``` -The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions). +The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions). Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. It allows for avoiding issues with large `INSERT` queries. @@ -31,30 +31,30 @@ There may be any number of space symbols between syntactical constructions (incl ClickHouse supports either SQL-style and C-style comments: -- SQL-style comments start with `--`, `#!` or `# ` and continue to the end of the line, a space after `--` and `#!` can be omitted. -- C-style are from `/*` to `*/`and can be multiline, spaces are not required either. +- SQL-style comments start with `--`, `#!` or `# ` and continue to the end of the line, a space after `--` and `#!` can be omitted. +- C-style are from `/*` to `*/`and can be multiline, spaces are not required either. ## Keywords Keywords are case-insensitive when they correspond to: -- SQL standard. For example, `SELECT`, `select` and `SeLeCt` are all valid. -- Implementation in some popular DBMS (MySQL or Postgres). For example, `DateTime` is the same as `datetime`. +- SQL standard. For example, `SELECT`, `select` and `SeLeCt` are all valid. +- Implementation in some popular DBMS (MySQL or Postgres). For example, `DateTime` is the same as `datetime`. You can check whether a data type name is case-sensitive in the [system.data_type_families](../operations/system-tables/data_type_families.md#system_tables-data_type_families) table. In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**. -Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#syntax-identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`. +Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`. ## Identifiers Identifiers are: -- Cluster, database, table, partition, and column names. -- Functions. -- Data types. -- [Expression aliases](#syntax-expression_aliases). +- Cluster, database, table, partition, and column names. +- Functions. +- Data types. +- [Expression aliases](#expression_aliases). Identifiers can be quoted or non-quoted. The latter is preferred. @@ -70,10 +70,10 @@ There are numeric, string, compound, and `NULL` literals. Numeric literal tries to be parsed: -- First, as a 64-bit signed number, using the [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) function. -- If unsuccessful, as a 64-bit unsigned number, using the [strtoll](https://en.cppreference.com/w/cpp/string/byte/strtol) function. -- If unsuccessful, as a floating-point number using the [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) function. -- Otherwise, it returns an error. +- First, as a 64-bit signed number, using the [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) function. +- If unsuccessful, as a 64-bit unsigned number, using the [strtoll](https://en.cppreference.com/w/cpp/string/byte/strtol) function. +- If unsuccessful, as a floating-point number using the [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) function. +- Otherwise, it returns an error. Literal value has the smallest type that the value fits in. For example, 1 is parsed as `UInt8`, but 256 is parsed as `UInt16`. For more information, see [Data types](../sql-reference/data-types/index.md). @@ -86,8 +86,8 @@ Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF` String literals must be enclosed in single quotes, double quotes are not supported. Escaping works either -- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or -- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones. +- using a preceding single quote where the single-quote character `'` (and only this character) can be escaped as `''`, or +- using a preceding backslash with the following supported escape sequences: `\\`, `\'`, `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`. The backslash loses its special meaning, i.e. will be interpreted literally, if it precedes characters different than the listed ones. In string literals, you need to escape at least `'` and `\` using escape codes `\'` (or: `''`) and `\\`. @@ -108,7 +108,7 @@ Depending on the data format (input or output), `NULL` may have a different repr There are many nuances to processing `NULL`. For example, if at least one of the arguments of a comparison operation is `NULL`, the result of this operation is also `NULL`. The same is true for multiplication, addition, and other operations. For more information, read the documentation for each operation. -In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#operator-is-null) and [IS NOT NULL](../sql-reference/operators/index.md) operators and the related functions `isNull` and `isNotNull`. +In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#is-null) and [IS NOT NULL](../sql-reference/operators/index.md#is-not-null) operators and the related functions `isNull` and `isNotNull`. ### Heredoc @@ -149,7 +149,7 @@ For example, the following SQL defines parameters named `a`, `b`, `c` and `d` - SET param_a = 13; SET param_b = 'str'; SET param_c = '2022-08-04 18:30:53'; -SET param_d = {'10': [11, 12], '13': [14, 15]}'; +SET param_d = {'10': [11, 12], '13': [14, 15]}; SELECT {a: UInt32}, @@ -166,7 +166,7 @@ Result: If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is retrieved as a `String`: -```sql +```bash clickhouse-client --param_message='hello' --query="SELECT {message: String}" ``` @@ -190,7 +190,7 @@ Query parameters are not general text substitutions which can be used in arbitra ## Functions Function calls are written like an identifier with a list of arguments (possibly empty) in round brackets. In contrast to standard SQL, the brackets are required, even for an empty argument list. Example: `now()`. -There are regular and aggregate functions (see the section “Aggregate functions”). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions. +There are regular and aggregate functions (see the section [Aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md)). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions. ## Operators @@ -199,7 +199,7 @@ For example, the expression `1 + 2 * 3 + 4` is transformed to `plus(plus(1, mult ## Data Types and Database Table Engines -Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections “Data types,” “Table engines,” and “CREATE”. +Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections [Data types](/docs/en/sql-reference/data-types/index.md), [Table engines](/docs/en/engines/table-engines/index.md), and [CREATE](/docs/en/sql-reference/statements/create/index.md). ## Expression Aliases @@ -209,19 +209,19 @@ An alias is a user-defined name for expression in a query. expr AS alias ``` -- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` clause without using the `AS` keyword. +- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` clause without using the `AS` keyword. - For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. + For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. - In the [CAST](./functions/type-conversion-functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function. + In the [CAST](./functions/type-conversion-functions.md#castx-t) function, the `AS` keyword has another meaning. See the description of the function. -- `expr` — Any expression supported by ClickHouse. +- `expr` — Any expression supported by ClickHouse. - For example, `SELECT column_name * 2 AS double FROM some_table`. + For example, `SELECT column_name * 2 AS double FROM some_table`. -- `alias` — Name for `expr`. Aliases should comply with the [identifiers](#syntax-identifiers) syntax. +- `alias` — Name for `expr`. Aliases should comply with the [identifiers](#identifiers) syntax. - For example, `SELECT "table t".column_name FROM table_name AS "table t"`. + For example, `SELECT "table t".column_name FROM table_name AS "table t"`. ### Notes on Usage @@ -254,11 +254,11 @@ Received exception from server (version 18.14.17): Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. ``` -In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer_column_name_to_alias) to `1`. +In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer-column-name-to-alias) to `1`. ## Asterisk -In a `SELECT` query, an asterisk can replace the expression. For more information, see the section “SELECT”. +In a `SELECT` query, an asterisk can replace the expression. For more information, see the section [SELECT](/docs/en/sql-reference/statements/select/index.md#asterisk). ## Expressions diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index 4904553c39a..904c678750c 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -25,7 +25,7 @@ clusterAllReplicas('cluster_name', db, table[, sharding_key]) - `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. - `db.table` or `db`, `table` - Name of a database and a table. -- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard. +- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard. **Returned value** @@ -47,13 +47,13 @@ Using the `cluster` and `clusterAllReplicas` table functions are less efficient The `cluster` and `clusterAllReplicas` table functions can be useful in the following cases: -- Accessing a specific cluster for data comparison, debugging, and testing. -- Queries to various ClickHouse clusters and replicas for research purposes. -- Infrequent distributed requests that are made manually. +- Accessing a specific cluster for data comparison, debugging, and testing. +- Queries to various ClickHouse clusters and replicas for research purposes. +- Infrequent distributed requests that are made manually. Connection settings like `host`, `port`, `user`, `password`, `compression`, `secure` are taken from `` config section. See details in [Distributed engine](../../engines/table-engines/special/distributed.md). **See Also** -- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) -- [load_balancing](../../operations/settings/settings.md#settings-load_balancing) +- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [load_balancing](../../operations/settings/settings.md#settings-load_balancing) diff --git a/docs/en/sql-reference/table-functions/dictionary.md b/docs/en/sql-reference/table-functions/dictionary.md index ab511843d63..c4bdde4dce2 100644 --- a/docs/en/sql-reference/table-functions/dictionary.md +++ b/docs/en/sql-reference/table-functions/dictionary.md @@ -15,7 +15,7 @@ dictionary('dict') **Arguments** -- `dict` — A dictionary name. [String](../../sql-reference/data-types/string.md). +- `dict` — A dictionary name. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -56,4 +56,4 @@ Result: **See Also** -- [Dictionary engine](../../engines/table-engines/special/dictionary.md#dictionary) +- [Dictionary engine](../../engines/table-engines/special/dictionary.md#dictionary) diff --git a/docs/en/sql-reference/table-functions/executable.md b/docs/en/sql-reference/table-functions/executable.md index 22c74eb8cfa..c6aba61aedb 100644 --- a/docs/en/sql-reference/table-functions/executable.md +++ b/docs/en/sql-reference/table-functions/executable.md @@ -20,7 +20,7 @@ A key advantage between ordinary UDF functions and the `executable` table functi The `executable` table function requires three parameters and accepts an optional list of input queries: ```sql -executable(script_name, format, structure, [input_query...]) +executable(script_name, format, structure, [input_query...] [,SETTINGS ...]) ``` - `script_name`: the file name of the script. saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting) @@ -83,6 +83,15 @@ The response looks like: └────┴────────────┘ ``` +## Settings + +- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Default value is `false`. +- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`. +- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. +- `command_termination_timeout` — executable script should contain main read-write loop. After table function is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. +- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000. +- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000. + ## Passing Query Results to a Script Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable.md#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function: @@ -94,4 +103,4 @@ SELECT * FROM executable( 'id UInt64, sentiment Float32', (SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20) ); -``` \ No newline at end of file +``` diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index 48c2cadc62c..28c2dc9f1f3 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -6,9 +6,9 @@ sidebar_label: file # file -Creates a table from a file. This table function is similar to [url](/docs/en/sql-reference/table-functions/url.md) and [hdfs](/docs/en/sql-reference/table-functions/hdfs.md) ones. +Provides a table-like interface to SELECT from and INSERT to files. This table function is similar to the [s3](/docs/en/sql-reference/table-functions/url.md) table function. Use file() when working with local files, and s3() when working with buckets in S3, GCS, or MinIO. -`file` function can be used in `SELECT` and `INSERT` queries on data in [File](/docs/en/engines/table-engines/special/file.md) tables. +The `file` function can be used in `SELECT` and `INSERT` queries to read from or write to files. **Syntax** @@ -18,16 +18,61 @@ file(path [,format] [,structure] [,compression]) **Parameters** -- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. -- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`. +- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. +- `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`. **Returned value** A table with the specified structure for reading or writing data in the specified file. -**Examples** +## File Write Examples + +### Write to a TSV file + +```sql +INSERT INTO TABLE FUNCTION +file('test.tsv', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +VALUES (1, 2, 3), (3, 2, 1), (1, 3, 2) +``` + +As a result, the data is written into the file `test.tsv`: + +```bash +# cat /var/lib/clickhouse/user_files/test.tsv +1 2 3 +3 2 1 +1 3 2 +``` + +### Partitioned Write to multiple TSV files + +If you specify `PARTITION BY` expression when inserting data into a file() function, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. + +```sql +INSERT INTO TABLE FUNCTION +file('test_{_partition_id}.tsv', 'TSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +PARTITION BY column3 +VALUES (1, 2, 3), (3, 2, 1), (1, 3, 2) +``` + +As a result, the data is written into three files: `test_1.tsv`, `test_2.tsv`, and `test_3.tsv`. + +```bash +# cat /var/lib/clickhouse/user_files/test_1.tsv +3 2 1 + +# cat /var/lib/clickhouse/user_files/test_2.tsv +1 3 2 + +# cat /var/lib/clickhouse/user_files/test_3.tsv +1 2 3 +``` + +## File Read Examples + +### SELECT from a CSV file Setting `user_files_path` and the contents of the file `test.csv`: @@ -44,7 +89,9 @@ $ cat /var/lib/clickhouse/user_files/test.csv Getting data from a table in `test.csv` and selecting the first two rows from it: ``` sql -SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2; +SELECT * FROM +file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; ``` ``` text @@ -57,14 +104,21 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U Getting the first 10 lines of a table that contains 3 columns of [UInt32](/docs/en/sql-reference/data-types/int-uint.md) type from a CSV file: ``` sql -SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10; +SELECT * FROM +file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 10; ``` -Inserting data from a file into a table: +### Inserting data from a file into a table: ``` sql -INSERT INTO FUNCTION file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') VALUES (1, 2, 3), (3, 2, 1); -SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); +INSERT INTO FUNCTION +file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +VALUES (1, 2, 3), (3, 2, 1); +``` +```sql +SELECT * FROM +file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); ``` ``` text @@ -78,11 +132,11 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix). -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. -- `**` - Fetches all files inside the folder recursively. +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. +- `**` - Fetches all files inside the folder recursively. Constructions with `{}` are similar to the [remote](remote.md) table function. @@ -90,12 +144,12 @@ Constructions with `{}` are similar to the [remote](remote.md) table function. Suppose we have several files with the following relative paths: -- 'some_dir/some_file_1' -- 'some_dir/some_file_2' -- 'some_dir/some_file_3' -- 'another_dir/some_file_1' -- 'another_dir/some_file_2' -- 'another_dir/some_file_3' +- 'some_dir/some_file_1' +- 'some_dir/some_file_2' +- 'some_dir/some_file_3' +- 'another_dir/some_file_1' +- 'another_dir/some_file_2' +- 'another_dir/some_file_3' Query the number of rows in these files: @@ -139,9 +193,9 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3 ## Virtual Columns -- `_path` — Path to the file. -- `_file` — Name of the file. +- `_path` — Path to the file. +- `_file` — Name of the file. **See Also** -- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns) +- [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/sql-reference/table-functions/format.md b/docs/en/sql-reference/table-functions/format.md index 811eae12942..2813eef5bcf 100644 --- a/docs/en/sql-reference/table-functions/format.md +++ b/docs/en/sql-reference/table-functions/format.md @@ -16,9 +16,9 @@ format(format_name, [structure], data) **Parameters** -- `format_name` — The [format](../../interfaces/formats.md#formats) of the data. -- `structure` - Structure of the table. Optional. Format 'column1_name column1_type, column2_name column2_type, ...'. -- `data` — String literal or constant expression that returns a string containing data in specified format +- `format_name` — The [format](../../interfaces/formats.md#formats) of the data. +- `structure` - Structure of the table. Optional. Format 'column1_name column1_type, column2_name column2_type, ...'. +- `data` — String literal or constant expression that returns a string containing data in specified format **Returned value** @@ -95,4 +95,4 @@ $$) **See Also** -- [Formats](../../interfaces/formats.md) +- [Formats](../../interfaces/formats.md) diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index b53ccdd42b5..bfc114daa72 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -16,11 +16,11 @@ generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_stri **Arguments** -- `name` — Name of corresponding column. -- `TypeName` — Type of corresponding column. -- `max_array_length` — Maximum elements for all generated arrays or maps. Defaults to `10`. -- `max_string_length` — Maximum string length for all generated strings. Defaults to `10`. -- `random_seed` — Specify random seed manually to produce stable results. If NULL — seed is randomly generated. +- `name` — Name of corresponding column. +- `TypeName` — Type of corresponding column. +- `max_array_length` — Maximum elements for all generated arrays or maps. Defaults to `10`. +- `max_string_length` — Maximum string length for all generated strings. Defaults to `10`. +- `random_seed` — Specify random seed manually to produce stable results. If NULL — seed is randomly generated. **Returned Value** diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 1b4588a9b55..6ba24211131 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -14,9 +14,9 @@ hdfs(URI, format, structure) **Input parameters** -- `URI` — The relative URI to the file in HDFS. Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, \``'abc', 'def'` — strings. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `URI` — The relative URI to the file in HDFS. Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, \``'abc', 'def'` — strings. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. **Returned value** @@ -43,10 +43,10 @@ LIMIT 2 Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). -- `*` — Substitutes any number of any characters except `/` including empty string. -- `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. -- `{N..M}` — Substitutes any number in range from N to M including both borders. +- `*` — Substitutes any number of any characters except `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)). @@ -54,12 +54,12 @@ Constructions with `{}` are similar to the [remote table function](../../sql-ref 1. Suppose that we have several files with following URIs on HDFS: -- ‘hdfs://hdfs1:9000/some_dir/some_file_1’ -- ‘hdfs://hdfs1:9000/some_dir/some_file_2’ -- ‘hdfs://hdfs1:9000/some_dir/some_file_3’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_1’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_2’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_3’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_3’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_3’ 2. Query the amount of rows in these files: @@ -94,9 +94,9 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin ## Virtual Columns -- `_path` — Path to the file. -- `_file` — Name of the file. +- `_path` — Path to the file. +- `_file` — Name of the file. **See Also** -- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns) +- [Virtual columns](../../engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md index 546c6a3d1b1..fa17a01accf 100644 --- a/docs/en/sql-reference/table-functions/hdfsCluster.md +++ b/docs/en/sql-reference/table-functions/hdfsCluster.md @@ -16,10 +16,10 @@ hdfsCluster(cluster_name, URI, format, structure) **Arguments** -- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. **Returned value** @@ -29,12 +29,12 @@ A table with the specified structure for reading data in the specified file. 1. Suppose that we have a ClickHouse cluster named `cluster_simple`, and several files with following URIs on HDFS: -- ‘hdfs://hdfs1:9000/some_dir/some_file_1’ -- ‘hdfs://hdfs1:9000/some_dir/some_file_2’ -- ‘hdfs://hdfs1:9000/some_dir/some_file_3’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_1’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_2’ -- ‘hdfs://hdfs1:9000/another_dir/some_file_3’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/some_dir/some_file_3’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_1’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_2’ +- ‘hdfs://hdfs1:9000/another_dir/some_file_3’ 2. Query the amount of rows in these files: @@ -56,5 +56,5 @@ If your listing of files contains number ranges with leading zeros, use the cons **See Also** -- [HDFS engine](../../engines/table-engines/integrations/hdfs.md) -- [HDFS table function](../../sql-reference/table-functions/hdfs.md) +- [HDFS engine](../../engines/table-engines/integrations/hdfs.md) +- [HDFS table function](../../sql-reference/table-functions/hdfs.md) diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md index 1010d53e86d..b16295db36a 100644 --- a/docs/en/sql-reference/table-functions/index.md +++ b/docs/en/sql-reference/table-functions/index.md @@ -10,15 +10,15 @@ Table functions are methods for constructing tables. You can use table functions in: -- [FROM](../../sql-reference/statements/select/from.md) clause of the `SELECT` query. +- [FROM](../../sql-reference/statements/select/from.md) clause of the `SELECT` query. The method for creating a temporary table that is available only in the current query. The table is deleted when the query finishes. -- [CREATE TABLE AS table_function()](../../sql-reference/statements/create/table.md) query. +- [CREATE TABLE AS table_function()](../../sql-reference/statements/create/table.md) query. It's one of the methods of creating a table. -- [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query. +- [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function) query. :::note You can’t use table functions if the [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) setting is disabled. diff --git a/docs/en/sql-reference/table-functions/input.md b/docs/en/sql-reference/table-functions/input.md index b07bc1bb431..6aa1cab00c1 100644 --- a/docs/en/sql-reference/table-functions/input.md +++ b/docs/en/sql-reference/table-functions/input.md @@ -24,7 +24,7 @@ with all transferred data is not created. **Examples** -- Let the `test` table has the following structure `(a String, b String)` +- Let the `test` table has the following structure `(a String, b String)` and data in `data.csv` has a different structure `(col1 String, col2 Date, col3 Int32)`. Query for insert data from the `data.csv` into the `test` table with simultaneous conversion looks like this: @@ -34,7 +34,7 @@ with all transferred data is not created. $ cat data.csv | clickhouse-client --query="INSERT INTO test SELECT lower(col1), col3 * col3 FROM input('col1 String, col2 Date, col3 Int32') FORMAT CSV"; ``` -- If `data.csv` contains data of the same structure `test_structure` as the table `test` then these two queries are equal: +- If `data.csv` contains data of the same structure `test_structure` as the table `test` then these two queries are equal: diff --git a/docs/en/sql-reference/table-functions/merge.md b/docs/en/sql-reference/table-functions/merge.md index ce3cdded3f2..ba0d19b804e 100644 --- a/docs/en/sql-reference/table-functions/merge.md +++ b/docs/en/sql-reference/table-functions/merge.md @@ -24,4 +24,4 @@ merge('db_name', 'tables_regexp') **See Also** -- [Merge](../../engines/table-engines/special/merge.md) table engine +- [Merge](../../engines/table-engines/special/merge.md) table engine diff --git a/docs/en/sql-reference/table-functions/mongodb.md b/docs/en/sql-reference/table-functions/mongodb.md index 706ab68fee4..042225dd1f0 100644 --- a/docs/en/sql-reference/table-functions/mongodb.md +++ b/docs/en/sql-reference/table-functions/mongodb.md @@ -16,19 +16,19 @@ mongodb(host:port, database, collection, user, password, structure [, options]) **Arguments** -- `host:port` — MongoDB server address. +- `host:port` — MongoDB server address. -- `database` — Remote database name. +- `database` — Remote database name. -- `collection` — Remote collection name. +- `collection` — Remote collection name. -- `user` — MongoDB user. +- `user` — MongoDB user. -- `password` — User password. +- `password` — User password. -- `structure` - The schema for the ClickHouse table returned from this function. +- `structure` - The schema for the ClickHouse table returned from this function. -- `options` - MongoDB connection string options (optional parameter). +- `options` - MongoDB connection string options (optional parameter). **Returned Value** @@ -70,5 +70,5 @@ SELECT * FROM mongodb( **See Also** -- [The `MongoDB` table engine](/docs/en/engines/table-engines/integrations/mongodb.md) -- [Using MongoDB as a dictionary source](/docs/en/sql-reference/dictionaries/index.md#mongodb) +- [The `MongoDB` table engine](/docs/en/engines/table-engines/integrations/mongodb.md) +- [Using MongoDB as a dictionary source](/docs/en/sql-reference/dictionaries/index.md#mongodb) diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index 64ddcd86f7f..8d7656365f5 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -16,21 +16,21 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_ **Arguments** -- `host:port` — MySQL server address. +- `host:port` — MySQL server address. -- `database` — Remote database name. +- `database` — Remote database name. -- `table` — Remote table name. +- `table` — Remote table name. -- `user` — MySQL user. +- `user` — MySQL user. -- `password` — User password. +- `password` — User password. -- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. Possible values: +- `replace_query` — Flag that converts `INSERT INTO` queries to `REPLACE INTO`. Possible values: - `0` - The query is executed as `INSERT INTO`. - `1` - The query is executed as `REPLACE INTO`. -- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Can be specified only with `replace_query = 0` (if you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception). +- `on_duplicate_clause` — The `ON DUPLICATE KEY on_duplicate_clause` expression that is added to the `INSERT` query. Can be specified only with `replace_query = 0` (if you simultaneously pass `replace_query = 1` and `on_duplicate_clause`, ClickHouse generates an exception). Example: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1;` @@ -109,5 +109,5 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123'); **See Also** -- [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md) -- [Using MySQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql) +- [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md) +- [Using MySQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-mysql) diff --git a/docs/en/sql-reference/table-functions/null.md b/docs/en/sql-reference/table-functions/null.md index 04d7f08f259..d27295f1916 100644 --- a/docs/en/sql-reference/table-functions/null.md +++ b/docs/en/sql-reference/table-functions/null.md @@ -15,7 +15,7 @@ null('structure') **Parameter** -- `structure` — A list of columns and column types. [String](../../sql-reference/data-types/string.md). +- `structure` — A list of columns and column types. [String](../../sql-reference/data-types/string.md). **Returned value** @@ -38,4 +38,4 @@ DROP TABLE IF EXISTS t; See also: -- [Null table engine](../../engines/table-engines/special/null.md) +- [Null table engine](../../engines/table-engines/special/null.md) diff --git a/docs/en/sql-reference/table-functions/odbc.md b/docs/en/sql-reference/table-functions/odbc.md index 397a9ba6c89..781ebacc680 100644 --- a/docs/en/sql-reference/table-functions/odbc.md +++ b/docs/en/sql-reference/table-functions/odbc.md @@ -14,9 +14,9 @@ odbc(connection_settings, external_database, external_table) Parameters: -- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. -- `external_database` — Name of a database in an external DBMS. -- `external_table` — Name of a table in the `external_database`. +- `connection_settings` — Name of the section with connection settings in the `odbc.ini` file. +- `external_database` — Name of a database in an external DBMS. +- `external_table` — Name of a table in the `external_database`. To safely implement ODBC connections, ClickHouse uses a separate program `clickhouse-odbc-bridge`. If the ODBC driver is loaded directly from `clickhouse-server`, driver problems can crash the ClickHouse server. ClickHouse automatically starts `clickhouse-odbc-bridge` when it is required. The ODBC bridge program is installed from the same package as the `clickhouse-server`. @@ -101,5 +101,5 @@ SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test') ## See Also -- [ODBC dictionaries](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-odbc) -- [ODBC table engine](../../engines/table-engines/integrations/odbc.md). +- [ODBC dictionaries](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-odbc) +- [ODBC table engine](../../engines/table-engines/integrations/odbc.md). diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index 6cd13acaa77..3e147fb8417 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -16,12 +16,12 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) **Arguments** -- `host:port` — PostgreSQL server address. -- `database` — Remote database name. -- `table` — Remote table name. -- `user` — PostgreSQL user. -- `password` — User password. -- `schema` — Non-default table schema. Optional. +- `host:port` — PostgreSQL server address. +- `database` — Remote database name. +- `table` — Remote table name. +- `user` — PostgreSQL user. +- `password` — User password. +- `schema` — Non-default table schema. Optional. **Returned Value** @@ -129,8 +129,10 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) **See Also** -- [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md) -- [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql) +- [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md) +- [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/index.md#dictionary-sources#dicts-external_dicts_dict_sources-postgresql) ## Related content + - Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) +- Blog: [ClickHouse and PostgreSQL - a Match Made in Data Heaven - part 2](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres-part-2) diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 12894abb3ff..a9ddc286ec5 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -23,7 +23,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: @@ -33,10 +33,10 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ and not ~~https://storage.cloud.google.com~~. ::: -- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. +- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. **Returned value** @@ -84,14 +84,14 @@ ClickHouse also can determine the compression of the file. For example, if the f Suppose that we have several files with following URIs on S3: -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv' Count the amount of rows in files ending with numbers from 1 to 3: @@ -204,4 +204,4 @@ LIMIT 5; **See Also** -- [S3 engine](../../engines/table-engines/integrations/s3.md) +- [S3 engine](../../engines/table-engines/integrations/s3.md) diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index 504f92b4dc0..7ac6773672c 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -15,11 +15,11 @@ s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [, **Arguments** -- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. **Returned value** @@ -48,5 +48,5 @@ If your listing of files contains number ranges with leading zeros, use the cons **See Also** -- [S3 engine](../../engines/table-engines/integrations/s3.md) -- [s3 table function](../../sql-reference/table-functions/s3.md) +- [S3 engine](../../engines/table-engines/integrations/s3.md) +- [s3 table function](../../sql-reference/table-functions/s3.md) diff --git a/docs/en/sql-reference/table-functions/sqlite.md b/docs/en/sql-reference/table-functions/sqlite.md index 1895f32421e..344fab4fad2 100644 --- a/docs/en/sql-reference/table-functions/sqlite.md +++ b/docs/en/sql-reference/table-functions/sqlite.md @@ -15,12 +15,12 @@ Allows to perform queries on a data stored in an [SQLite](../../engines/database **Arguments** -- `db_path` — Path to a file with an SQLite database. [String](../../sql-reference/data-types/string.md). -- `table_name` — Name of a table in the SQLite database. [String](../../sql-reference/data-types/string.md). +- `db_path` — Path to a file with an SQLite database. [String](../../sql-reference/data-types/string.md). +- `table_name` — Name of a table in the SQLite database. [String](../../sql-reference/data-types/string.md). **Returned value** -- A table object with the same columns as in the original `SQLite` table. +- A table object with the same columns as in the original `SQLite` table. **Example** @@ -42,4 +42,4 @@ Result: **See Also** -- [SQLite](../../engines/table-engines/integrations/sqlite.md) table engine +- [SQLite](../../engines/table-engines/integrations/sqlite.md) table engine diff --git a/docs/en/sql-reference/table-functions/view.md b/docs/en/sql-reference/table-functions/view.md index 074481cc522..2c21fe9ff4b 100644 --- a/docs/en/sql-reference/table-functions/view.md +++ b/docs/en/sql-reference/table-functions/view.md @@ -15,11 +15,11 @@ view(subquery) **Arguments** -- `subquery` — `SELECT` query. +- `subquery` — `SELECT` query. **Returned value** -- A table. +- A table. **Example** @@ -63,4 +63,4 @@ SELECT * FROM cluster(`cluster_name`, view(SELECT a, b, c FROM table_name)); **See Also** -- [View Table Engine](https://clickhouse.com/docs/en/engines/table-engines/special/view/) +- [View Table Engine](https://clickhouse.com/docs/en/engines/table-engines/special/view/) diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md new file mode 100644 index 00000000000..68fbfe0b22a --- /dev/null +++ b/docs/en/sql-reference/transactions.md @@ -0,0 +1,260 @@ +--- +slug: /en/guides/developer/transactional +--- +# Transactional (ACID) support + +INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID): +- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed. +- Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted. +- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen; +- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). +* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own; +* INSERT into multiple tables with one statement is possible if materialized views are involved; +* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional; +* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable; +* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting; +* max_insert_block_size is 1 000 000 by default and can be adjusted as needed; +* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties; +* ClickHouse is using MVCC with snapshot isolation internally; +* all ACID properties are valid even in case of server kill / crash; +* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup; +* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) +* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. + +## Transactions, Commit, and Rollback + +In addition to the functionality described at the top of this document, ClickHouse has experimental support for transactions, commits, and rollback functionality. + +### Requirements + +- Deploy ClickHouse Keeper or ZooKeeper to track transactions +- Atomic DB only (Default) +- Non-Replicated MergeTree table engine only +- Enable experimental transaction support by adding this setting in `config.d/transactions.xml`: + ```xml + + 1 + + ``` + +### Notes +- This is an experimental feature, and changes should be expected. +- If an exception occurs during a transaction, you cannot commit the transaction. This includes all exceptions, including `UNKNOWN_FUNCTION` exceptions caused by typos. +- Nested transactions are not supported; finish the current transaction and start a new one instead + +### Configuration + +These examples are with a single node ClickHouse server with ClickHouse Keeper enabled. + +#### Enable experimental transaction support + +```xml title=/etc/clickhouse-server/config.d/transactions.xml + + 1 + +``` + +#### Basic configuration for a single ClickHouse server node with ClickHouse Keeper enabled + +:::note +See the [deployment](docs/en/deployment-guides/terminology.md) documentation for details on deploying ClickHouse server and a proper quorum of ClickHouse Keeper nodes. The configuration shown here is for experimental purposes. +::: + +```xml title=/etc/clickhouse-server/config.d/config.xml + + + debug + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 3 + + node 1 + 0.0.0.0 + 8123 + 9000 + + + clickhouse-01 + 9181 + + + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + + 10000 + 30000 + information + + + + 1 + clickhouse-keeper-01 + 9234 + + + + +``` + +### Example + +#### Verify that experimental transactions are enabled + +Issue a `BEGIN TRANSACTION` followed by a `ROLLBACK` to verify that experimental transactions are enabled, and that ClickHouse Keeper is enabled as it is used to track transactions. + +```sql +BEGIN TRANSACTION +``` +```response +Ok. +``` + +:::tip +If you see the following error, then check your configuration file to make sure that `allow_experimental_transactions` is set to `1` (or any value other than `0` or `false`). +``` +Code: 48. DB::Exception: Received from localhost:9000. +DB::Exception: Transactions are not supported. +(NOT_IMPLEMENTED) +``` + +You can also check ClickHouse Keeper by issuing +``` +echo ruok | nc localhost 9181 +``` +ClickHouse Keeper should respond with `imok`. +::: + +```sql +ROLLBACK +``` +```response +Ok. +``` + +#### Create a table for testing + +:::tip +Creation of tables is not transactional. Run this DDL query outside of a transaction. +::: + +```sql +CREATE TABLE mergetree_table +( + `n` Int64 +) +ENGINE = MergeTree +ORDER BY n +``` +```response +Ok. +``` + +#### Begin a transaction and insert a row + +```sql +BEGIN TRANSACTION +``` +```response +Ok. +``` + +```sql +INSERT INTO mergetree_table FORMAT Values (10) +``` +```response +Ok. +``` + +```sql +SELECT * +FROM mergetree_table +``` +```response +┌──n─┐ +│ 10 │ +└────┘ +``` +:::note +You can query the table from within a transaction and see that the row was inserted even though it has not yet been committed. +::: + +#### Rollback the transaction, and query the table again + +Verify that the transaction is rolled back: +```sql +ROLLBACK +``` +```response +Ok. +``` +```sql +SELECT * +FROM mergetree_table +``` +```response +Ok. + +0 rows in set. Elapsed: 0.002 sec. +``` + +#### Complete a transaction and query the table again + +```sql +BEGIN TRANSACTION +``` +```response +Ok. +``` + +```sql +INSERT INTO mergetree_table FORMAT Values (42) +``` +```response +Ok. +``` + +```sql +COMMIT +``` +```response +Ok. Elapsed: 0.002 sec. +``` + +```sql +SELECT * +FROM mergetree_table +``` +```response +┌──n─┐ +│ 42 │ +└────┘ +``` + +### Transactions introspection + +You can inspect transactions by querying the `system.transactions` table, but note that you cannot query that +table from a session that is in a transaction–open a second `clickhouse client` session to query that table. + +```sql +SELECT * +FROM system.transactions +FORMAT Vertical +``` +```response +Row 1: +────── +tid: (33,61,'51e60bce-6b82-4732-9e1d-b40705ae9ab8') +tid_hash: 11240433987908122467 +elapsed: 210.017820947 +is_readonly: 1 +state: RUNNING +``` + +## More Details + +See this [meta issue](https://github.com/ClickHouse/ClickHouse/issues/48794) to find much more extensive tests and to keep up to date with the progress. + diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 59d49830852..bc0bf03e5d4 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -140,8 +140,8 @@ ORDER BY │ 1 │ 1 │ 1 │ [1,2,3] │ <┐ │ 1 │ 2 │ 2 │ [1,2,3] │ │ 1-st group │ 1 │ 3 │ 3 │ [1,2,3] │ <┘ -│ 2 │ 0 │ 0 │ [0] │ <- 2-nd group -│ 3 │ 0 │ 0 │ [0] │ <- 3-d group +│ 2 │ 0 │ 0 │ [0] │ <- 2-nd group +│ 3 │ 0 │ 0 │ [0] │ <- 3-d group └──────────┴───────┴───────┴──────────────┘ ``` diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 80472178ae2..7294bc2ae87 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -41,9 +41,15 @@ ClickHouse не работает и не собирается на 32-битны Выполните в терминале: - git clone git@github.com:your_github_username/ClickHouse.git --recursive + git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git cd ClickHouse +Или (если вы хотите использовать sparse checkout для submodules): + + git clone git@github.com:your_github_username/ClickHouse.git + cd ClickHouse + ./contrib/update-submodules.sh + Замените слово `your_github_username` в команде для git на имя вашего аккаунта на GitHub. Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md index 7be933d67d7..50434419651 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md @@ -7,7 +7,7 @@ sidebar_position: 141 Суммирует разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована. -Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), упорядоченных по некоторому временному бакету согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, невозможно объединить их в "правом" порядке. Функция отслеживает `timestamp` наблюдаемых значений, поэтому возможно правильно упорядочить состояния во время слияния. +Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому округленному временному интервалу, согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков. Чтобы вычислить разницу между упорядоченными последовательными строками, вы можете использовать функцию [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) вместо функции `deltaSumTimestamp`. diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index bb2716b2741..c43323d68fd 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -695,7 +695,11 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; :::note "Примечание" Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). ::: - + +## arrayPartialSort(\[func,\] limit, arr, …) {#array_functions-sort} + +То же, что и `arraySort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в возрастающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. + ## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} Возвращает массив `arr`, отсортированный в нисходящем порядке. Если указана функция `func`, то массив `arr` сначала сортируется в порядке, который определяется функцией `func`, а затем отсортированный массив переворачивается. Если функция `func` принимает несколько аргументов, то в функцию `arrayReverseSort` необходимо передавать несколько массивов, которые будут соответствовать аргументам функции `func`. Подробные примеры рассмотрены в конце описания функции `arrayReverseSort`. @@ -797,6 +801,10 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; └─────────┘ ``` +## arrayPartialReverseSort(\[func,\] limit, arr, …) {#array_functions-sort} + +То же, что и `arrayReverseSort` с дополнительным аргументом `limit`, позволяющим частичную сортировку. Возвращает массив того же размера, как и исходный, в котором элементы `[1..limit]` отсортированы в убывающем порядке. Остальные элементы `(limit..N]` остаются в неспецифицированном порядке. + ## arrayUniq(arr, …) {#array-functions-arrayuniq} Если передан один аргумент, считает количество разных элементов в массиве. diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md index 73c63850750..9b8fafabfcc 100644 --- a/docs/ru/sql-reference/statements/grant.md +++ b/docs/ru/sql-reference/statements/grant.md @@ -37,6 +37,19 @@ GRANT [ON CLUSTER cluster_name] role [,...] TO {user | another_role | CURRENT_US `WITH ADMIN OPTION` присваивает привилегию [ADMIN OPTION](#admin-option-privilege) пользователю или роли. `WITH REPLACE OPTION` заменяет все старые роли новыми ролями для пользователя `user` или `role`, если не указано, добавляет новые новые роли. +## Синтаксис присвоения текущих привилегий {#grant-current-grants-syntax} + +```sql +GRANT CURRENT GRANTS{(privilege[(column_name [,...])] [,...] ON {db.table|db.*|*.*|table|*}) | ON {db.table|db.*|*.*|table|*}} TO {user | role | CURRENT_USER} [,...] [WITH GRANT OPTION] [WITH REPLACE OPTION] +``` + +- `privilege` — Тип привилегии +- `role` — Роль пользователя ClickHouse. +- `user` — Пользователь ClickHouse. + +Использование выражения `CURRENT GRANTS` позволяет присвоить все указанные и доступные для присвоения привилегии. +Если список привелегий не задан, то указанный пользователь или роль получат все доступные привилегии для `CURRENT_USER`. + ## Использование {#grant-usage} Для использования `GRANT` пользователь должен иметь привилегию `GRANT OPTION`. Пользователь может выдавать привилегии только внутри области действий назначенных ему самому привилегий. diff --git a/docs/zh/sql-reference/functions/string-functions.md b/docs/zh/sql-reference/functions/string-functions.md index 89d64f76074..d1914839d7c 100644 --- a/docs/zh/sql-reference/functions/string-functions.md +++ b/docs/zh/sql-reference/functions/string-functions.md @@ -168,3 +168,15 @@ SELECT format('{} {}', 'Hello', 'World') ## trimBoth(s) {#trimboths} 返回一个字符串,用于删除任一侧的空白字符。 + +## soundex(s) + +返回一个字符串的soundex值。输出类型是FixedString,示例如下: + +``` sql +select soundex('aksql'); + +┌─soundex('aksel')─┐ +│ A240 │ +└──────────────────┘ +``` diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 660b8d7c00a..df0abceb8c6 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -277,11 +277,11 @@ void Client::initialize(Poco::Util::Application & self) */ const char * env_user = getenv("CLICKHOUSE_USER"); // NOLINT(concurrency-mt-unsafe) - if (env_user) + if (env_user && !config().has("user")) config().setString("user", env_user); const char * env_password = getenv("CLICKHOUSE_PASSWORD"); // NOLINT(concurrency-mt-unsafe) - if (env_password) + if (env_password && !config().has("password")) config().setString("password", env_password); parseConnectionsCredentials(); diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 079c70596a6..efe7121cace 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -1773,7 +1773,7 @@ String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, C QueryPipelineBuilder builder; builder.init(Pipe(std::make_shared( - std::make_shared(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context), false, false))); + std::make_shared(connection, query, InterpreterShowCreateQuery::getSampleBlock(), remote_context), false, false, /* async_query_sending= */ false))); Block block = getBlockWithAllStreamData(std::move(builder)); return typeid_cast(*block.safeGetByPosition(0).column).getDataAt(0).toString(); } diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index b3c9936cd33..48f4b0fab09 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp index 451a33a1c02..d055ceb4c7b 100644 --- a/programs/copier/TaskTable.cpp +++ b/programs/copier/TaskTable.cpp @@ -4,9 +4,11 @@ #include "TaskCluster.h" #include +#include #include + namespace DB { namespace ErrorCodes diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index b142159fbdf..d83e189f7ef 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -375,15 +375,22 @@ int mainEntryClickHouseInstall(int argc, char ** argv) try { - ReadBufferFromFile in(binary_self_path.string()); - WriteBufferFromFile out(main_bin_tmp_path.string()); - copyData(in, out); - out.sync(); + String source = binary_self_path.string(); + String destination = main_bin_tmp_path.string(); - if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) + /// Try to make a hard link first, as an optimization. + /// It is possible if the source and the destination are on the same filesystems. + if (0 != link(source.c_str(), destination.c_str())) + { + ReadBufferFromFile in(binary_self_path.string()); + WriteBufferFromFile out(main_bin_tmp_path.string()); + copyData(in, out); + out.sync(); + out.finalize(); + } + + if (0 != chmod(destination.c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH)) throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR); - - out.finalize(); } catch (const Exception & e) { diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 266b363eb47..3853c955171 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 9ef9f704f61..8c0d50bae55 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -981,7 +981,7 @@ try StatusFile status{path / "status", StatusFile::write_full_info}; - DB::ServerUUID::load(path / "uuid", log); + ServerUUID::load(path / "uuid", log); /// Try to increase limit on number of open files. { @@ -1192,12 +1192,12 @@ try { Settings::checkNoSettingNamesAtTopLevel(*config, config_path); - ServerSettings server_settings; - server_settings.loadSettingsFromConfig(*config); + ServerSettings server_settings_; + server_settings_.loadSettingsFromConfig(*config); - size_t max_server_memory_usage = server_settings.max_server_memory_usage; + size_t max_server_memory_usage = server_settings_.max_server_memory_usage; - double max_server_memory_usage_to_ram_ratio = server_settings.max_server_memory_usage_to_ram_ratio; + double max_server_memory_usage_to_ram_ratio = server_settings_.max_server_memory_usage_to_ram_ratio; size_t default_max_server_memory_usage = static_cast(memory_amount * max_server_memory_usage_to_ram_ratio); if (max_server_memory_usage == 0) @@ -1225,7 +1225,7 @@ try total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); - total_memory_tracker.setAllowUseJemallocMemory(server_settings.allow_use_jemalloc_memory); + total_memory_tracker.setAllowUseJemallocMemory(server_settings_.allow_use_jemalloc_memory); auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker(); total_memory_tracker.setOvercommitTracker(global_overcommit_tracker); @@ -1243,23 +1243,23 @@ try global_context->setRemoteHostFilter(*config); - global_context->setMaxTableSizeToDrop(server_settings.max_table_size_to_drop); - global_context->setMaxPartitionSizeToDrop(server_settings.max_partition_size_to_drop); + global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop); + global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop); ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited; - if (server_settings.concurrent_threads_soft_limit_num > 0 && server_settings.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) - concurrent_threads_soft_limit = server_settings.concurrent_threads_soft_limit_num; - if (server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0) + if (server_settings_.concurrent_threads_soft_limit_num > 0 && server_settings_.concurrent_threads_soft_limit_num < concurrent_threads_soft_limit) + concurrent_threads_soft_limit = server_settings_.concurrent_threads_soft_limit_num; + if (server_settings_.concurrent_threads_soft_limit_ratio_to_cores > 0) { - auto value = server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency(); + auto value = server_settings_.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency(); if (value > 0 && value < concurrent_threads_soft_limit) concurrent_threads_soft_limit = value; } ConcurrencyControl::instance().setMaxConcurrency(concurrent_threads_soft_limit); - global_context->getProcessList().setMaxSize(server_settings.max_concurrent_queries); - global_context->getProcessList().setMaxInsertQueriesAmount(server_settings.max_concurrent_insert_queries); - global_context->getProcessList().setMaxSelectQueriesAmount(server_settings.max_concurrent_select_queries); + global_context->getProcessList().setMaxSize(server_settings_.max_concurrent_queries); + global_context->getProcessList().setMaxInsertQueriesAmount(server_settings_.max_concurrent_insert_queries); + global_context->getProcessList().setMaxSelectQueriesAmount(server_settings_.max_concurrent_select_queries); if (config->has("keeper_server")) global_context->updateKeeperConfiguration(*config); @@ -1270,34 +1270,34 @@ try /// This is done for backward compatibility. if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = server_settings.background_pool_size; - auto new_ratio = server_settings.background_merges_mutations_concurrency_ratio; + auto new_pool_size = server_settings_.background_pool_size; + auto new_ratio = server_settings_.background_merges_mutations_concurrency_ratio; global_context->getMergeMutateExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, static_cast(new_pool_size * new_ratio)); - global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings.background_merges_mutations_scheduling_policy.toString()); + global_context->getMergeMutateExecutor()->updateSchedulingPolicy(server_settings_.background_merges_mutations_scheduling_policy.toString()); } if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = server_settings.background_move_pool_size; + auto new_pool_size = server_settings_.background_move_pool_size; global_context->getMovesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size); } if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = server_settings.background_fetches_pool_size; + auto new_pool_size = server_settings_.background_fetches_pool_size; global_context->getFetchesExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size); } if (global_context->areBackgroundExecutorsInitialized()) { - auto new_pool_size = server_settings.background_common_pool_size; + auto new_pool_size = server_settings_.background_common_pool_size; global_context->getCommonExecutor()->increaseThreadsAndMaxTasksCount(new_pool_size, new_pool_size); } - global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings.background_buffer_flush_schedule_pool_size); - global_context->getSchedulePool().increaseThreadsCount(server_settings.background_schedule_pool_size); - global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings.background_message_broker_schedule_pool_size); - global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings.background_distributed_schedule_pool_size); + global_context->getBufferFlushSchedulePool().increaseThreadsCount(server_settings_.background_buffer_flush_schedule_pool_size); + global_context->getSchedulePool().increaseThreadsCount(server_settings_.background_schedule_pool_size); + global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings_.background_message_broker_schedule_pool_size); + global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings_.background_distributed_schedule_pool_size); if (config->has("resources")) { diff --git a/programs/server/config.xml b/programs/server/config.xml index cfcd2ff93e0..1aeda624db2 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1293,7 +1293,7 @@ - + - + - - + + diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index fa940e01ad5..97b35ec97c4 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -12,7 +12,7 @@ --chart-background: white; --shadow-color: rgba(0, 0, 0, 0.25); --input-shadow-color: rgba(0, 255, 0, 1); - --error-color: red; + --error-color: white; --legend-background: rgba(255, 255, 255, 0.75); --title-color: #666; --text-color: black; @@ -76,7 +76,7 @@ #charts { height: 100%; - display: none; + display: flex; flex-flow: row wrap; gap: 1rem; } @@ -121,6 +121,19 @@ .unconnected #url { width: 100%; } + .unconnected #button-options { + display: grid; + grid-auto-flow: column; + grid-auto-columns: 1fr; + gap: 0.3rem; + } + .unconnected #user { + margin-right: 0; + width: auto; + } + .unconnected #password { + width: auto; + } #user { margin-right: 0.25rem; width: 50%; @@ -136,7 +149,15 @@ width: 100%; display: flex; - flex-flow: row nowrap; + flex-flow: row nowrap; + } + .unconnected #username-password { + width: 100%; + + gap: 0.3rem; + + display: grid; + grid-template-columns: 1fr 1fr; } .inputs #chart-params { @@ -177,7 +198,10 @@ .themes { float: right; font-size: 20pt; - margin-bottom: 1rem; + gap: 0.3rem; + + display: flex; + justify-content: center; } #toggle-dark, #toggle-light { @@ -206,6 +230,8 @@ } #add, #reload { + padding: .25rem 0.5rem; + text-align: center; font-weight: bold; user-select: none; cursor: pointer; @@ -214,16 +240,24 @@ background: var(--new-chart-background-color); color: var(--new-chart-text-color); float: right; - margin-right: 0 !important; - margin-left: 1rem; + margin-right: 1rem !important; + margin-left: 0rem; margin-bottom: 1rem; } + /* .unconnected #reload { + margin-left: 3px; + } */ + #add:hover, #reload:hover { background: var(--button-background-color); } #auth-error { + align-self: center; + width: 60%; + padding: .5rem; + color: var(--error-color); display: flex; @@ -352,15 +386,15 @@ -
- - +
🌚🌞 + +
+
-