Merge branch 'master' into system_disks_unreserved_space

This commit is contained in:
Kseniia Sumarokova 2022-10-03 14:25:47 +02:00 committed by GitHub
commit 0bfbb5c38a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
795 changed files with 18327 additions and 7238 deletions

View File

@ -887,6 +887,51 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAarch64V80Compat:
needs: [DockerHubPush]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64_v80compat
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
with:
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -972,6 +1017,7 @@ jobs:
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinAmd64SSE2
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderDebShared
runs-on: [self-hosted, style-checker]

View File

@ -940,6 +940,49 @@ jobs:
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
BuilderBinAarch64V80Compat:
needs: [DockerHubPush, FastTest, StyleCheck]
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
BUILD_NAME=binary_aarch64_v80compat
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
uses: actions/checkout@v2
- name: Build
run: |
git -C "$GITHUB_WORKSPACE" submodule sync --recursive
git -C "$GITHUB_WORKSPACE" submodule update --depth=1 --recursive --init --jobs=10
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
- name: Upload build URLs to artifacts
if: ${{ success() || failure() }}
uses: actions/upload-artifact@v2
with:
name: ${{ env.BUILD_URLS }}
path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json
- name: Cleanup
if: always()
run: |
docker ps --quiet | xargs --no-run-if-empty docker kill ||:
docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
############################################################################################
##################################### Docker images #######################################
############################################################################################
@ -1025,6 +1068,7 @@ jobs:
# - BuilderBinGCC
- BuilderBinPPC64
- BuilderBinAmd64SSE2
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderDebShared
runs-on: [self-hosted, style-checker]

10
.gitmodules vendored
View File

@ -30,9 +30,6 @@
[submodule "contrib/re2"]
path = contrib/re2
url = https://github.com/google/re2.git
[submodule "contrib/llvm"]
path = contrib/llvm
url = https://github.com/ClickHouse/llvm
[submodule "contrib/mariadb-connector-c"]
path = contrib/mariadb-connector-c
url = https://github.com/ClickHouse/mariadb-connector-c.git
@ -259,6 +256,10 @@
[submodule "contrib/minizip-ng"]
path = contrib/minizip-ng
url = https://github.com/zlib-ng/minizip-ng
[submodule "contrib/annoy"]
path = contrib/annoy
url = https://github.com/ClickHouse/annoy.git
branch = ClickHouse-master
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl.git
@ -280,3 +281,6 @@
[submodule "contrib/c-ares"]
path = contrib/c-ares
url = https://github.com/ClickHouse/c-ares
[submodule "contrib/llvm-project"]
path = contrib/llvm-project
url = https://github.com/ClickHouse/llvm-project.git

View File

@ -1,4 +1,5 @@
### Table of Contents
**[ClickHouse release v22.9, 2022-09-22](#229)**<br/>
**[ClickHouse release v22.8, 2022-08-18](#228)**<br/>
**[ClickHouse release v22.7, 2022-07-21](#227)**<br/>
**[ClickHouse release v22.6, 2022-06-16](#226)**<br/>
@ -10,6 +11,213 @@
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**<br/>
### <a id="229"></a> ClickHouse release 22.9, 2022-09-22
#### Backward Incompatible Change
* Upgrade from 20.3 and older to 22.9 and newer should be done through an intermediate version if there are any `ReplicatedMergeTree` tables, otherwise server with the new version will not start. [#40641](https://github.com/ClickHouse/ClickHouse/pull/40641) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Remove the functions `accurate_Cast` and `accurate_CastOrNull` (they are different to `accurateCast` and `accurateCastOrNull` by underscore in the name and they are not affected by the value of `cast_keep_nullable` setting). These functions were undocumented, untested, unused, and unneeded. They appeared to be alive due to code generalization. [#40682](https://github.com/ClickHouse/ClickHouse/pull/40682) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a test to ensure that every new table function will be documented. See [#40649](https://github.com/ClickHouse/ClickHouse/issues/40649). Rename table function `MeiliSearch` to `meilisearch`. [#40709](https://github.com/ClickHouse/ClickHouse/pull/40709) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a test to ensure that every new function will be documented. See [#40649](https://github.com/ClickHouse/ClickHouse/pull/40649). The functions `lemmatize`, `synonyms`, `stem` were case-insensitive by mistake. Now they are case-sensitive. [#40711](https://github.com/ClickHouse/ClickHouse/pull/40711) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Make interpretation of YAML configs to be more conventional. [#41044](https://github.com/ClickHouse/ClickHouse/pull/41044) ([Vitaly Baranov](https://github.com/vitlibar)).
#### New Feature
* Support `insert_quorum = 'auto'` to use majority number. [#39970](https://github.com/ClickHouse/ClickHouse/pull/39970) ([Sachin](https://github.com/SachinSetiya)).
* Add embedded dashboards to ClickHouse server. This is a demo project about how to achieve 90% results with 1% effort using ClickHouse features. [#40461](https://github.com/ClickHouse/ClickHouse/pull/40461) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Added new settings constraint writability kind `changeable_in_readonly`. [#40631](https://github.com/ClickHouse/ClickHouse/pull/40631) ([Sergei Trifonov](https://github.com/serxa)).
* Add support for `INTERSECT DISTINCT` and `EXCEPT DISTINCT`. [#40792](https://github.com/ClickHouse/ClickHouse/pull/40792) ([Duc Canh Le](https://github.com/canhld94)).
* Add new input/output format `JSONObjectEachRow` - Support import for formats `JSON/JSONCompact/JSONColumnsWithMetadata`. Add new setting `input_format_json_validate_types_from_metadata` that controls whether we should check if data types from metadata match data types from the header. - Add new setting `input_format_json_validate_utf8`, when it's enabled, all `JSON` formats will validate UTF-8 sequences. It will be disabled by default. Note that this setting doesn't influence output formats `JSON/JSONCompact/JSONColumnsWithMetadata`, they always validate utf8 sequences (this exception was made because of compatibility reasons). - Add new setting `input_format_json_read_numbers_as_strings ` that allows to parse numbers in String column, the setting is disabled by default. - Add new setting `output_format_json_quote_decimals` that allows to output decimals in double quotes, disabled by default. - Allow to parse decimals in double quotes during data import. [#40910](https://github.com/ClickHouse/ClickHouse/pull/40910) ([Kruglov Pavel](https://github.com/Avogar)).
* Query parameters supported in DESCRIBE TABLE query. [#40952](https://github.com/ClickHouse/ClickHouse/pull/40952) ([Nikita Taranov](https://github.com/nickitat)).
* Add support to Parquet Time32/64 by converting it into DateTime64. Parquet time32/64 represents time elapsed since midnight, while DateTime32/64 represents an actual unix timestamp. Conversion simply offsets from `0`. [#41333](https://github.com/ClickHouse/ClickHouse/pull/41333) ([Arthur Passos](https://github.com/arthurpassos)).
* Implement set operations on Apache Datasketches. [#39919](https://github.com/ClickHouse/ClickHouse/pull/39919) ([Fangyuan Deng](https://github.com/pzhdfy)). Note: there is no point of using Apache Datasketches, they are inferiour than ClickHouse and only make sense for integration with other systems.
* Allow recording errors to specified file while reading text formats (`CSV`, `TSV`). [#40516](https://github.com/ClickHouse/ClickHouse/pull/40516) ([zjial](https://github.com/zjial)).
#### Experimental Feature
* Add ANN (approximate nearest neighbor) index based on `Annoy`. [#40818](https://github.com/ClickHouse/ClickHouse/pull/40818) ([Filatenkov Artur](https://github.com/FArthur-cmd)). [#37215](https://github.com/ClickHouse/ClickHouse/pull/37215) ([VVMak](https://github.com/VVMak)).
* Add new storage engine `KeeperMap`, that uses ClickHouse Keeper or ZooKeeper as a key-value store. [#39976](https://github.com/ClickHouse/ClickHouse/pull/39976) ([Antonio Andelic](https://github.com/antonio2368)). This storage engine is intended to store a small amount of metadata.
* Improvement for in-memory data parts: remove completely processed WAL files. [#40592](https://github.com/ClickHouse/ClickHouse/pull/40592) ([Azat Khuzhin](https://github.com/azat)).
#### Performance Improvement
* Implement compression of marks and primary key. Close [#34437](https://github.com/ClickHouse/ClickHouse/issues/34437). [#37693](https://github.com/ClickHouse/ClickHouse/pull/37693) ([zhongyuankai](https://github.com/zhongyuankai)).
* Allow to load marks with threadpool in advance. Regulated by setting `load_marks_asynchronously` (default: 0). [#40821](https://github.com/ClickHouse/ClickHouse/pull/40821) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Virtual filesystem over s3 will use random object names split into multiple path prefixes for better performance on AWS. [#40968](https://github.com/ClickHouse/ClickHouse/pull/40968) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Account `max_block_size` value while producing single-level aggregation results. Allows to execute following query plan steps using more threads. [#39138](https://github.com/ClickHouse/ClickHouse/pull/39138) ([Nikita Taranov](https://github.com/nickitat)).
* Software prefetching is used in aggregation to speed up operations with hash tables. Controlled by the setting `enable_software_prefetch_in_aggregation`, enabled by default. [#39304](https://github.com/ClickHouse/ClickHouse/pull/39304) ([Nikita Taranov](https://github.com/nickitat)).
* Better support of `optimize_read_in_order` in case when some of sorting key columns are always constant after applying `WHERE` clause. E.g. query like `SELECT ... FROM table WHERE a = 'x' ORDER BY a, b`, where `table` has storage definition: `MergeTree ORDER BY (a, b)`. [#38715](https://github.com/ClickHouse/ClickHouse/pull/38715) ([Anton Popov](https://github.com/CurtizJ)).
* Filter joined streams for `full_sorting_join` by each other before sorting. [#39418](https://github.com/ClickHouse/ClickHouse/pull/39418) ([Vladimir C](https://github.com/vdimir)).
* LZ4 decompression optimised by skipping empty literals processing. [#40142](https://github.com/ClickHouse/ClickHouse/pull/40142) ([Nikita Taranov](https://github.com/nickitat)).
* Speedup backup process using native `copy` when possible instead of copying through `clickhouse-server` memory. [#40395](https://github.com/ClickHouse/ClickHouse/pull/40395) ([alesapin](https://github.com/alesapin)).
* Do not obtain storage snapshot for each INSERT block (slightly improves performance). [#40638](https://github.com/ClickHouse/ClickHouse/pull/40638) ([Azat Khuzhin](https://github.com/azat)).
* Implement batch processing for aggregate functions with multiple nullable arguments. [#41058](https://github.com/ClickHouse/ClickHouse/pull/41058) ([Raúl Marín](https://github.com/Algunenano)).
* Speed up reading UniquesHashSet (`uniqState` from disk for example). [#41089](https://github.com/ClickHouse/ClickHouse/pull/41089) ([Raúl Marín](https://github.com/Algunenano)).
* Fixed high memory usage while executing mutations of compact parts in tables with huge number of columns. [#41122](https://github.com/ClickHouse/ClickHouse/pull/41122) ([lthaooo](https://github.com/lthaooo)).
* Enable the vectorscan library on ARM, this speeds up regexp evaluation. [#41033](https://github.com/ClickHouse/ClickHouse/pull/41033) ([Robert Schulze](https://github.com/rschu1ze)).
* Upgrade vectorscan to 5.4.8 which has many performance optimizations to speed up regexp evaluation. [#41270](https://github.com/ClickHouse/ClickHouse/pull/41270) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix incorrect fallback to skip the local filesystem cache for VFS (like S3) which happened on very high concurrency level. [#40420](https://github.com/ClickHouse/ClickHouse/pull/40420) ([Kseniia Sumarokova](https://github.com/kssenii)).
* If row policy filter is always false, return empty result immediately without reading any data. This closes [#24012](https://github.com/ClickHouse/ClickHouse/issues/24012). [#40740](https://github.com/ClickHouse/ClickHouse/pull/40740) ([Amos Bird](https://github.com/amosbird)).
* Parallel hash JOIN for Float data types might be suboptimal. Make it better. [#41183](https://github.com/ClickHouse/ClickHouse/pull/41183) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Improvement
* During startup and ATTACH call, `ReplicatedMergeTree` tables will be readonly until the ZooKeeper connection is made and the setup is finished. [#40148](https://github.com/ClickHouse/ClickHouse/pull/40148) ([Antonio Andelic](https://github.com/antonio2368)).
* Add `enable_extended_results_for_datetime_functions` option to return results of type Date32 for functions toStartOfYear, toStartOfISOYear, toStartOfQuarter, toStartOfMonth, toStartOfWeek, toMonday and toLastDayOfMonth when argument is Date32 or DateTime64, otherwise results of Date type are returned. For compatibility reasons default value is 0. [#41214](https://github.com/ClickHouse/ClickHouse/pull/41214) ([Roman Vasin](https://github.com/rvasin)).
* For security and stability reasons, CatBoost models are no longer evaluated within the ClickHouse server. Instead, the evaluation is now done in the clickhouse-library-bridge, a separate process that loads the catboost library and communicates with the server process via HTTP. [#40897](https://github.com/ClickHouse/ClickHouse/pull/40897) ([Robert Schulze](https://github.com/rschu1ze)). [#39629](https://github.com/ClickHouse/ClickHouse/pull/39629) ([Robert Schulze](https://github.com/rschu1ze)).
* Add more metrics for on-disk temporary data, close [#40206](https://github.com/ClickHouse/ClickHouse/issues/40206). [#40239](https://github.com/ClickHouse/ClickHouse/pull/40239) ([Vladimir C](https://github.com/vdimir)).
* Add config option `warning_supress_regexp`, close [#40330](https://github.com/ClickHouse/ClickHouse/issues/40330). [#40548](https://github.com/ClickHouse/ClickHouse/pull/40548) ([Vladimir C](https://github.com/vdimir)).
* Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)).
* Support `SETTINGS` in `DELETE ...` query. [#41533](https://github.com/ClickHouse/ClickHouse/pull/41533) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Detailed S3 profile events `DiskS3*` per S3 API call split for S3 ObjectStorage. [#41532](https://github.com/ClickHouse/ClickHouse/pull/41532) ([Sergei Trifonov](https://github.com/serxa)).
* Two new metrics in `system.asynchronous_metrics`. `NumberOfDetachedParts` and `NumberOfDetachedByUserParts`. [#40779](https://github.com/ClickHouse/ClickHouse/pull/40779) ([Sema Checherinda](https://github.com/CheSema)).
* Allow CONSTRAINTs for ODBC and JDBC tables. [#34551](https://github.com/ClickHouse/ClickHouse/pull/34551) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Don't print `SETTINGS` more than once during query formatting if it didn't appear multiple times in the original query. [#38900](https://github.com/ClickHouse/ClickHouse/pull/38900) ([Raúl Marín](https://github.com/Algunenano)).
* Improve the tracing (OpenTelemetry) context propagation across threads. [#39010](https://github.com/ClickHouse/ClickHouse/pull/39010) ([Frank Chen](https://github.com/FrankChen021)).
* ClickHouse Keeper: add listeners for `interserver_listen_host` only in Keeper if specified. [#39973](https://github.com/ClickHouse/ClickHouse/pull/39973) ([Antonio Andelic](https://github.com/antonio2368)).
* Improve recovery of Replicated user access storage after errors. [#39977](https://github.com/ClickHouse/ClickHouse/pull/39977) ([Vitaly Baranov](https://github.com/vitlibar)).
* Add support for TTL in `EmbeddedRocksDB`. [#39986](https://github.com/ClickHouse/ClickHouse/pull/39986) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)).
* Add schema inference to `clickhouse-obfuscator`, so the `--structure` argument is no longer required. [#40120](https://github.com/ClickHouse/ClickHouse/pull/40120) ([Nikolay Degterinsky](https://github.com/evillique)).
* Improve and fix dictionaries in `Arrow` format. [#40173](https://github.com/ClickHouse/ClickHouse/pull/40173) ([Kruglov Pavel](https://github.com/Avogar)).
* More natural conversion of `Date32`, `DateTime64`, `Date` to narrower types: upper or lower normal value is considered when out of normal range. [#40217](https://github.com/ClickHouse/ClickHouse/pull/40217) ([Andrey Zvonov](https://github.com/zvonand)).
* Fix the case when `Merge` table over `View` cannot use index. [#40233](https://github.com/ClickHouse/ClickHouse/pull/40233) ([Duc Canh Le](https://github.com/canhld94)).
* Custom key names for JSON server logs. [#40251](https://github.com/ClickHouse/ClickHouse/pull/40251) ([Mallik Hassan](https://github.com/SadiHassan)).
* It is now possible to set a custom error code for the exception thrown by function `throwIf`. [#40319](https://github.com/ClickHouse/ClickHouse/pull/40319) ([Robert Schulze](https://github.com/rschu1ze)).
* Improve schema inference cache, respect format settings that can change the schema. [#40414](https://github.com/ClickHouse/ClickHouse/pull/40414) ([Kruglov Pavel](https://github.com/Avogar)).
* Allow parsing `Date` as `DateTime` and `DateTime64`. This implements the enhancement proposed in [#36949](https://github.com/ClickHouse/ClickHouse/issues/36949). [#40474](https://github.com/ClickHouse/ClickHouse/pull/40474) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow conversion from `String` with `DateTime64` like `2022-08-22 01:02:03.456` to `Date` and `Date32`. Allow conversion from String with DateTime like `2022-08-22 01:02:03` to `Date32`. This closes [#39598](https://github.com/ClickHouse/ClickHouse/issues/39598). [#40475](https://github.com/ClickHouse/ClickHouse/pull/40475) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Better support for nested data structures in Parquet format [#40485](https://github.com/ClickHouse/ClickHouse/pull/40485) ([Arthur Passos](https://github.com/arthurpassos)).
* Support reading Array(Record) into flatten nested table in Avro. [#40534](https://github.com/ClickHouse/ClickHouse/pull/40534) ([Kruglov Pavel](https://github.com/Avogar)).
* Add read-only support for `EmbeddedRocksDB`. [#40543](https://github.com/ClickHouse/ClickHouse/pull/40543) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)).
* Validate the compression method parameter of URL table engine. [#40600](https://github.com/ClickHouse/ClickHouse/pull/40600) ([Frank Chen](https://github.com/FrankChen021)).
* Better format detection for url table function/engine in presence of a query string after a file name. Closes [#40315](https://github.com/ClickHouse/ClickHouse/issues/40315). [#40636](https://github.com/ClickHouse/ClickHouse/pull/40636) ([Kruglov Pavel](https://github.com/Avogar)).
* Disable projection when grouping set is used. It generated wrong result. This fixes [#40635](https://github.com/ClickHouse/ClickHouse/issues/40635). [#40726](https://github.com/ClickHouse/ClickHouse/pull/40726) ([Amos Bird](https://github.com/amosbird)).
* Fix incorrect format of `APPLY` column transformer which can break metadata if used in table definition. This fixes [#37590](https://github.com/ClickHouse/ClickHouse/issues/37590). [#40727](https://github.com/ClickHouse/ClickHouse/pull/40727) ([Amos Bird](https://github.com/amosbird)).
* Support the `%z` descriptor for formatting the timezone offset in `formatDateTime`. [#40736](https://github.com/ClickHouse/ClickHouse/pull/40736) ([Cory Levy](https://github.com/LevyCory)).
* The interactive mode in `clickhouse-client` now interprets `.` and `/` as "run the last command". [#40750](https://github.com/ClickHouse/ClickHouse/pull/40750) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168). [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Create status file for filesystem cache directory to make sure that cache directories are not shared between different servers or caches. [#40820](https://github.com/ClickHouse/ClickHouse/pull/40820) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add support for `DELETE` and `UPDATE` for `EmbeddedRocksDB` storage. [#40853](https://github.com/ClickHouse/ClickHouse/pull/40853) ([Antonio Andelic](https://github.com/antonio2368)).
* ClickHouse Keeper: fix shutdown during long commit and increase allowed request size. [#40941](https://github.com/ClickHouse/ClickHouse/pull/40941) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix race in WriteBufferFromS3, add TSA annotations. [#40950](https://github.com/ClickHouse/ClickHouse/pull/40950) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Grouping sets with group_by_use_nulls should only convert key columns to nullable. [#40997](https://github.com/ClickHouse/ClickHouse/pull/40997) ([Duc Canh Le](https://github.com/canhld94)).
* Improve the observability of INSERT on distributed table. [#41034](https://github.com/ClickHouse/ClickHouse/pull/41034) ([Frank Chen](https://github.com/FrankChen021)).
* More low-level metrics for S3 interaction. [#41039](https://github.com/ClickHouse/ClickHouse/pull/41039) ([mateng915](https://github.com/mateng0915)).
* Support relative path in Location header after HTTP redirect. Closes [#40985](https://github.com/ClickHouse/ClickHouse/issues/40985). [#41162](https://github.com/ClickHouse/ClickHouse/pull/41162) ([Kruglov Pavel](https://github.com/Avogar)).
* Apply changes to HTTP handlers on fly without server restart. [#41177](https://github.com/ClickHouse/ClickHouse/pull/41177) ([Azat Khuzhin](https://github.com/azat)).
* ClickHouse Keeper: properly close active sessions during shutdown. [#41215](https://github.com/ClickHouse/ClickHouse/pull/41215) ([Antonio Andelic](https://github.com/antonio2368)). This lowers the period of "table is read-only" errors.
* Add ability to automatically comment SQL queries in clickhouse-client/local (with `Alt-#`, like in readline). [#41224](https://github.com/ClickHouse/ClickHouse/pull/41224) ([Azat Khuzhin](https://github.com/azat)).
* Fix incompatibility of cache after switching setting `do_no_evict_index_and_mark_files` from 1 to 0, 0 to 1. [#41330](https://github.com/ClickHouse/ClickHouse/pull/41330) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add a setting `allow_suspicious_fixed_string_types` to prevent users from creating columns of type FixedString with size > 256. [#41495](https://github.com/ClickHouse/ClickHouse/pull/41495) ([Duc Canh Le](https://github.com/canhld94)).
* Add `has_lightweight_delete` to system.parts. [#41564](https://github.com/ClickHouse/ClickHouse/pull/41564) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Build/Testing/Packaging Improvement
* Enforce documentation for every setting. [#40644](https://github.com/ClickHouse/ClickHouse/pull/40644) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Enforce documentation for every current metric. [#40645](https://github.com/ClickHouse/ClickHouse/pull/40645) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Enforce documentation for every profile event counter. Write the documentation where it was missing. [#40646](https://github.com/ClickHouse/ClickHouse/pull/40646) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow minimal `clickhouse-local` build by correcting some dependencies. [#40460](https://github.com/ClickHouse/ClickHouse/pull/40460) ([Alexey Milovidov](https://github.com/alexey-milovidov)). It is less than 50 MiB.
* Calculate and report SQL function coverage in tests. [#40593](https://github.com/ClickHouse/ClickHouse/issues/40593). [#40647](https://github.com/ClickHouse/ClickHouse/pull/40647) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Enforce documentation for every MergeTree setting. [#40648](https://github.com/ClickHouse/ClickHouse/pull/40648) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* A prototype of embedded reference documentation for high-level uniform server components. [#40649](https://github.com/ClickHouse/ClickHouse/pull/40649) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* We will check all queries from the changed perf tests to ensure that all changed queries were tested. [#40322](https://github.com/ClickHouse/ClickHouse/pull/40322) ([Nikita Taranov](https://github.com/nickitat)).
* Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix debug symbols. [#40873](https://github.com/ClickHouse/ClickHouse/pull/40873) ([Azat Khuzhin](https://github.com/azat)).
* Extended the CI configuration to create a x86 SSE2-only build. Useful for old or embedded hardware. [#40999](https://github.com/ClickHouse/ClickHouse/pull/40999) ([Robert Schulze](https://github.com/rschu1ze)).
* Switch to llvm/clang 15. [#41046](https://github.com/ClickHouse/ClickHouse/pull/41046) ([Azat Khuzhin](https://github.com/azat)).
* Continuation of [#40938](https://github.com/ClickHouse/ClickHouse/issues/40938). Fix ODR violation for `Loggers` class. Fixes [#40398](https://github.com/ClickHouse/ClickHouse/issues/40398), [#40937](https://github.com/ClickHouse/ClickHouse/issues/40937). [#41060](https://github.com/ClickHouse/ClickHouse/pull/41060) ([Dmitry Novik](https://github.com/novikd)).
* Add macOS binaries to GitHub release assets, it fixes [#37718](https://github.com/ClickHouse/ClickHouse/issues/37718). [#41088](https://github.com/ClickHouse/ClickHouse/pull/41088) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* The c-ares library is now bundled with ClickHouse's build system. [#41239](https://github.com/ClickHouse/ClickHouse/pull/41239) ([Robert Schulze](https://github.com/rschu1ze)).
* Get rid of `dlopen` from the main ClickHouse code. It remains in the library-bridge and odbc-bridge. [#41428](https://github.com/ClickHouse/ClickHouse/pull/41428) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Don't allow `dlopen` in the main ClickHouse binary, because it is harmful and insecure. We don't use it. But it can be used by some libraries for the implementation of "plugins". We absolutely discourage the ancient technique of loading 3rd-party uncontrolled dangerous libraries into the process address space, because it is insane. [#41429](https://github.com/ClickHouse/ClickHouse/pull/41429) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Support for DWARF-5 in the in-house DWARF parser. [#40710](https://github.com/ClickHouse/ClickHouse/pull/40710) ([Azat Khuzhin](https://github.com/azat)).
* Add fault injection in ZooKeeper client for testing [#30498](https://github.com/ClickHouse/ClickHouse/pull/30498) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add stateless tests with s3 storage with debug and tsan [#35262](https://github.com/ClickHouse/ClickHouse/pull/35262) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Trying stress on top of S3 [#36837](https://github.com/ClickHouse/ClickHouse/pull/36837) ([alesapin](https://github.com/alesapin)).
* Enable `concurrency-mt-unsafe` in `clang-tidy` [#40224](https://github.com/ClickHouse/ClickHouse/pull/40224) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Bug Fix
* Fix potential dataloss due to [a bug in AWS SDK](https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)). This bug has been open for 5 years in AWS SDK and is closed after our report.
* Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix bugs in MergeJoin when 'not_processed' is not null. [#40335](https://github.com/ClickHouse/ClickHouse/pull/40335) ([liql2007](https://github.com/liql2007)).
* Fix incorrect result in case of decimal precision loss in IN operator, ref [#41125](https://github.com/ClickHouse/ClickHouse/issues/41125). [#41130](https://github.com/ClickHouse/ClickHouse/pull/41130) ([Vladimir C](https://github.com/vdimir)).
* Fix filling of missed `Nested` columns with multiple levels. [#37152](https://github.com/ClickHouse/ClickHouse/pull/37152) ([Anton Popov](https://github.com/CurtizJ)).
* Fix SYSTEM UNFREEZE query for Ordinary (deprecated) database. Fix for https://github.com/ClickHouse/ClickHouse/pull/36424. [#38262](https://github.com/ClickHouse/ClickHouse/pull/38262) ([Vadim Volodin](https://github.com/PolyProgrammist)).
* Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)).
* Fix query analysis for ORDER BY in presence of window functions. Fixes [#38741](https://github.com/ClickHouse/ClickHouse/issues/38741) Fixes [#24892](https://github.com/ClickHouse/ClickHouse/issues/24892). [#39354](https://github.com/ClickHouse/ClickHouse/pull/39354) ([Dmitry Novik](https://github.com/novikd)).
* Fixed `Unknown identifier (aggregate-function)` exception which appears when a user tries to calculate WINDOW ORDER BY/PARTITION BY expressions over aggregate functions. [#39762](https://github.com/ClickHouse/ClickHouse/pull/39762) ([Vladimir Chebotaryov](https://github.com/quickhouse)).
* Limit number of analyze for one query with setting `max_analyze_depth`. It prevents exponential blow up of analysis time for queries with extraordinarily large number of subqueries. [#40334](https://github.com/ClickHouse/ClickHouse/pull/40334) ([Vladimir C](https://github.com/vdimir)).
* Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)).
* Use DNS entries for both IPv4 and IPv6 if present. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)).
* Allow to read snappy compressed files from Hadoop. [#40482](https://github.com/ClickHouse/ClickHouse/pull/40482) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix crash while parsing values of type `Object` (experimental feature) that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)).
* Fix settings `input_format_tsv_skip_first_lines`. [#40491](https://github.com/ClickHouse/ClickHouse/pull/40491) ([mini4](https://github.com/mini4)).
* Fix bug (race condition) when starting up MaterializedPostgreSQL database/table engine. [#40262](https://github.com/ClickHouse/ClickHouse/issues/40262). Fix error with reaching limit of relcache_callback_list slots. [#40511](https://github.com/ClickHouse/ClickHouse/pull/40511) ([Maksim Buren](https://github.com/maks-buren630501)).
* Fix possible error 'Decimal math overflow' while parsing DateTime64. [#40546](https://github.com/ClickHouse/ClickHouse/pull/40546) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix vertical merge of parts with lightweight deleted rows. [#40559](https://github.com/ClickHouse/ClickHouse/pull/40559) ([Alexander Gololobov](https://github.com/davenger)).
* Fix segment fault when writing data to URL table engine if it enables compression. [#40565](https://github.com/ClickHouse/ClickHouse/pull/40565) ([Frank Chen](https://github.com/FrankChen021)).
* Fix possible logical error `'Invalid Field get from type UInt64 to type String'` in arrayElement function with Map. [#40572](https://github.com/ClickHouse/ClickHouse/pull/40572) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible race in filesystem cache. [#40586](https://github.com/ClickHouse/ClickHouse/pull/40586) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Removed skipping of mutations in unaffected partitions of `MergeTree` tables, because this feature never worked correctly and might cause resurrection of finished mutations. [#40589](https://github.com/ClickHouse/ClickHouse/pull/40589) ([Alexander Tokmakov](https://github.com/tavplubix)).
* The clickhouse server will crash if we add a grpc port which has been occupied to the configuration in runtime. [#40597](https://github.com/ClickHouse/ClickHouse/pull/40597) ([何李夫](https://github.com/helifu)).
* Fix `base58Encode / base58Decode` handling leading 0 / '1'. [#40620](https://github.com/ClickHouse/ClickHouse/pull/40620) ([Andrey Zvonov](https://github.com/zvonand)).
* keeper-fix: fix race in accessing logs while snapshot is being installed. [#40627](https://github.com/ClickHouse/ClickHouse/pull/40627) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix short circuit execution of toFixedString function. Solves (partially) [#40622](https://github.com/ClickHouse/ClickHouse/issues/40622). [#40628](https://github.com/ClickHouse/ClickHouse/pull/40628) ([Kruglov Pavel](https://github.com/Avogar)).
* Fixes SQLite int8 column conversion to int64 column in ClickHouse. Fixes [#40639](https://github.com/ClickHouse/ClickHouse/issues/40639). [#40642](https://github.com/ClickHouse/ClickHouse/pull/40642) ([Barum Rho](https://github.com/barumrho)).
* Fix stack overflow in recursive `Buffer` tables. This closes [#40637](https://github.com/ClickHouse/ClickHouse/issues/40637). [#40643](https://github.com/ClickHouse/ClickHouse/pull/40643) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)).
* Fix LOGICAL_ERROR with max_read_buffer_size=0 during reading marks. [#40705](https://github.com/ClickHouse/ClickHouse/pull/40705) ([Azat Khuzhin](https://github.com/azat)).
* Fix memory leak while pushing to MVs w/o query context (from Kafka/...). [#40732](https://github.com/ClickHouse/ClickHouse/pull/40732) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible error Attempt to read after eof in CSV schema inference. [#40746](https://github.com/ClickHouse/ClickHouse/pull/40746) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix logical error in write-through cache "File segment completion can be done only by downloader". Closes [#40748](https://github.com/ClickHouse/ClickHouse/issues/40748). [#40759](https://github.com/ClickHouse/ClickHouse/pull/40759) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Make the result of GROUPING function the same as in SQL and other DBMS. [#40762](https://github.com/ClickHouse/ClickHouse/pull/40762) ([Dmitry Novik](https://github.com/novikd)).
* In [#40595](https://github.com/ClickHouse/ClickHouse/issues/40595) it was reported that the `host_regexp` functionality was not working properly with a name to address resolution in `/etc/hosts`. It's fixed. [#40769](https://github.com/ClickHouse/ClickHouse/pull/40769) ([Arthur Passos](https://github.com/arthurpassos)).
* Fix incremental backups for Log family. [#40827](https://github.com/ClickHouse/ClickHouse/pull/40827) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix extremely rare bug which can lead to potential data loss in zero-copy replication. [#40844](https://github.com/ClickHouse/ClickHouse/pull/40844) ([alesapin](https://github.com/alesapin)).
* Fix key condition analyzing crashes when same set expression built from different column(s). [#40850](https://github.com/ClickHouse/ClickHouse/pull/40850) ([Duc Canh Le](https://github.com/canhld94)).
* Fix nested JSON Objects schema inference. [#40851](https://github.com/ClickHouse/ClickHouse/pull/40851) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix 3-digit prefix directory for filesystem cache files not being deleted if empty. Closes [#40797](https://github.com/ClickHouse/ClickHouse/issues/40797). [#40867](https://github.com/ClickHouse/ClickHouse/pull/40867) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix uncaught DNS_ERROR on failed connection to replicas. [#40881](https://github.com/ClickHouse/ClickHouse/pull/40881) ([Robert Coelho](https://github.com/coelho)).
* Fix bug when removing unneeded columns in subquery. [#40884](https://github.com/ClickHouse/ClickHouse/pull/40884) ([luocongkai](https://github.com/TKaxe)).
* Fix extra memory allocation for remote read buffers. [#40896](https://github.com/ClickHouse/ClickHouse/pull/40896) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fixed a behaviour when user with explicitly revoked grant for dropping databases can still drop it. [#40906](https://github.com/ClickHouse/ClickHouse/pull/40906) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* A fix for ClickHouse Keeper: correctly compare paths in write requests to Keeper internal system node paths. [#40918](https://github.com/ClickHouse/ClickHouse/pull/40918) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix deadlock in WriteBufferFromS3. [#40943](https://github.com/ClickHouse/ClickHouse/pull/40943) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix access rights for `DESCRIBE TABLE url()` and some other `DESCRIBE TABLE <table_function>()`. [#40975](https://github.com/ClickHouse/ClickHouse/pull/40975) ([Vitaly Baranov](https://github.com/vitlibar)).
* Remove wrong parser logic for `WITH GROUPING SETS` which may lead to nullptr dereference. [#41049](https://github.com/ClickHouse/ClickHouse/pull/41049) ([Duc Canh Le](https://github.com/canhld94)).
* A fix for ClickHouse Keeper: fix possible segfault during Keeper shutdown. [#41075](https://github.com/ClickHouse/ClickHouse/pull/41075) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix query_views_log with Window views. [#41132](https://github.com/ClickHouse/ClickHouse/pull/41132) ([Raúl Marín](https://github.com/Algunenano)).
* Disables optimize_monotonous_functions_in_order_by by default, mitigates: [#40094](https://github.com/ClickHouse/ClickHouse/issues/40094). [#41136](https://github.com/ClickHouse/ClickHouse/pull/41136) ([Denny Crane](https://github.com/den-crane)).
* Fixed "possible deadlock avoided" error on automatic conversion of database engine from Ordinary to Atomic. [#41146](https://github.com/ClickHouse/ClickHouse/pull/41146) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix SIGSEGV in SortedBlocksWriter in case of empty block (possible to get with `optimize_aggregation_in_order` and `join_algorithm=auto`). [#41154](https://github.com/ClickHouse/ClickHouse/pull/41154) ([Azat Khuzhin](https://github.com/azat)).
* Fix incorrect query result when trivial count optimization is in effect with array join. This fixes [#39431](https://github.com/ClickHouse/ClickHouse/issues/39431). [#41158](https://github.com/ClickHouse/ClickHouse/pull/41158) ([Denny Crane](https://github.com/den-crane)).
* Fix stack-use-after-return in GetPriorityForLoadBalancing::getPriorityFunc(). [#41159](https://github.com/ClickHouse/ClickHouse/pull/41159) ([Azat Khuzhin](https://github.com/azat)).
* Fix positional arguments exception Positional argument out of bounds. Closes [#40634](https://github.com/ClickHouse/ClickHouse/issues/40634). [#41189](https://github.com/ClickHouse/ClickHouse/pull/41189) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix background clean up of broken detached parts. [#41190](https://github.com/ClickHouse/ClickHouse/pull/41190) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix exponential query rewrite in case of lots of cross joins with where, close [#21557](https://github.com/ClickHouse/ClickHouse/issues/21557). [#41223](https://github.com/ClickHouse/ClickHouse/pull/41223) ([Vladimir C](https://github.com/vdimir)).
* Fix possible logical error in write-through cache, which happened because not all types of exception were handled as needed. Closes [#41208](https://github.com/ClickHouse/ClickHouse/issues/41208). [#41232](https://github.com/ClickHouse/ClickHouse/pull/41232) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix String log entry in system.filesystem_cache_log. [#41233](https://github.com/ClickHouse/ClickHouse/pull/41233) ([jmimbrero](https://github.com/josemimbrero-tinybird)).
* Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix possible wrong query result with `query_plan_optimize_primary_key` enabled. Fixes [#40599](https://github.com/ClickHouse/ClickHouse/issues/40599). [#41281](https://github.com/ClickHouse/ClickHouse/pull/41281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Do not allow invalid sequences influence other rows in lowerUTF8/upperUTF8. [#41286](https://github.com/ClickHouse/ClickHouse/pull/41286) ([Azat Khuzhin](https://github.com/azat)).
* Fix `ALTER <table> ADD COLUMN` queries with columns of type `Object`. [#41290](https://github.com/ClickHouse/ClickHouse/pull/41290) ([Anton Popov](https://github.com/CurtizJ)).
* Fixed "No node" error when selecting from `system.distributed_ddl_queue` when there's no `distributed_ddl.path` in config. Fixes [#41096](https://github.com/ClickHouse/ClickHouse/issues/41096). [#41296](https://github.com/ClickHouse/ClickHouse/pull/41296) ([young scott](https://github.com/young-scott)).
* Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix possible crash after inserting asynchronously (with enabled setting `async_insert`) malformed data to columns of type `Object`. It could happen, if JSONs in all batches of async inserts were invalid and could not be parsed. [#41336](https://github.com/ClickHouse/ClickHouse/pull/41336) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible deadlock with async_socket_for_remote/use_hedged_requests and parallel KILL. [#41343](https://github.com/ClickHouse/ClickHouse/pull/41343) ([Azat Khuzhin](https://github.com/azat)).
* Disables optimize_rewrite_sum_if_to_count_if by default, mitigates: [#38605](https://github.com/ClickHouse/ClickHouse/issues/38605) [#38683](https://github.com/ClickHouse/ClickHouse/issues/38683). [#41388](https://github.com/ClickHouse/ClickHouse/pull/41388) ([Denny Crane](https://github.com/den-crane)).
* Since 22.8 `ON CLUSTER` clause is ignored if database is `Replicated` and cluster name and database name are the same. Because of this `DROP PARTITION ON CLUSTER` worked unexpected way with `Replicated`. It's fixed, now `ON CLUSTER` clause is ignored only for queries that are replicated on database level. Fixes [#41299](https://github.com/ClickHouse/ClickHouse/issues/41299). [#41390](https://github.com/ClickHouse/ClickHouse/pull/41390) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix possible hung/deadlock on query cancellation (`KILL QUERY` or server shutdown). [#41467](https://github.com/ClickHouse/ClickHouse/pull/41467) ([Azat Khuzhin](https://github.com/azat)).
* Fix possible server crash when using the JBOD feature. This fixes [#41365](https://github.com/ClickHouse/ClickHouse/issues/41365). [#41483](https://github.com/ClickHouse/ClickHouse/pull/41483) ([Amos Bird](https://github.com/amosbird)).
* Fix conversion from nullable fixed string to string. [#41541](https://github.com/ClickHouse/ClickHouse/pull/41541) ([Duc Canh Le](https://github.com/canhld94)).
* Prevent crash when passing wrong aggregation states to groupBitmap*. [#41563](https://github.com/ClickHouse/ClickHouse/pull/41563) ([Raúl Marín](https://github.com/Algunenano)).
* Queries with `ORDER BY` and `1500 <= LIMIT <= max_block_size` could return incorrect result with missing rows from top. Fixes [#41182](https://github.com/ClickHouse/ClickHouse/issues/41182). [#41576](https://github.com/ClickHouse/ClickHouse/pull/41576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix read bytes/rows in X-ClickHouse-Summary with materialized views. [#41586](https://github.com/ClickHouse/ClickHouse/pull/41586) ([Raúl Marín](https://github.com/Algunenano)).
* Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
### <a id="228"></a> ClickHouse release 22.8, 2022-08-18
#### Backward Incompatible Change

View File

@ -18,7 +18,7 @@ include (cmake/target.cmake)
include (cmake/tools.cmake)
include (cmake/ccache.cmake)
include (cmake/clang_tidy.cmake)
include (cmake/git_status.cmake)
include (cmake/git.cmake)
# Ignore export() since we don't use it,
# but it gets broken with a global targets via link_libraries()

View File

@ -1,75 +0,0 @@
#pragma once
#include <map>
#include <tuple>
#include <mutex>
#include "FnTraits.h"
/**
* Caching proxy for a functor that decays to a pointer-to-function.
* Saves pairs (func args, func result on args).
* Cache size is unlimited. Cache items are evicted only on manual drop.
* Invocation/update is O(log(saved cache values)).
*
* See Common/tests/cached_fn.cpp for examples.
*/
template <auto * Func>
struct CachedFn
{
private:
using Traits = FnTraits<decltype(Func)>;
using DecayedArgs = TypeListMap<std::decay_t, typename Traits::Args>;
using Key = TypeListChangeRoot<std::tuple, DecayedArgs>;
using Result = typename Traits::Ret;
std::map<Key, Result> cache; // Can't use hashmap as tuples are unhashable by default
mutable std::mutex mutex;
public:
template <class ...Args>
Result operator()(Args && ...args)
{
Key key{std::forward<Args>(args)...};
{
std::lock_guard lock(mutex);
if (auto it = cache.find(key); it != cache.end())
return it->second;
}
Result res = std::apply(Func, key);
{
std::lock_guard lock(mutex);
cache.emplace(std::move(key), res);
}
return res;
}
template <class ...Args>
void update(Args && ...args)
{
Key key{std::forward<Args>(args)...};
Result res = std::apply(Func, key);
{
std::lock_guard lock(mutex);
// TODO Can't use emplace(std::move(key), ..), causes test_host_ip_change errors.
cache[key] = std::move(res);
}
}
size_t size() const
{
std::lock_guard lock(mutex);
return cache.size();
}
void drop()
{
std::lock_guard lock(mutex);
cache.clear();
}
};

View File

@ -1,6 +1,7 @@
#include <base/ReplxxLineReader.h>
#include <base/errnoToString.h>
#include <stdexcept>
#include <chrono>
#include <cerrno>
#include <cstring>
@ -13,8 +14,10 @@
#include <dlfcn.h>
#include <fcntl.h>
#include <fstream>
#include <filesystem>
#include <fmt/format.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/classification.hpp> /// is_any_of
namespace
{
@ -35,6 +38,166 @@ std::string getEditor()
return editor;
}
std::string getFuzzyFinder()
{
const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe)
if (!env_path || !*env_path)
return {};
std::vector<std::string> paths;
boost::split(paths, env_path, boost::is_any_of(":"));
for (const auto & path_str : paths)
{
std::filesystem::path path(path_str);
std::filesystem::path sk_bin_path = path / "sk";
if (!access(sk_bin_path.c_str(), X_OK))
return sk_bin_path;
std::filesystem::path fzf_bin_path = path / "fzf";
if (!access(fzf_bin_path.c_str(), X_OK))
return fzf_bin_path;
}
return {};
}
/// See comments in ShellCommand::executeImpl()
/// (for the vfork via dlsym())
int executeCommand(char * const argv[])
{
#if !defined(USE_MUSL)
/** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs,
* because of the resolving of symbols in the shared library
* http://www.oracle.com/technetwork/server-storage/solaris10/subprocess-136439.html
* Therefore, separate the resolving of the symbol from the call.
*/
static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");
#else
/// If we use Musl with static linking, there is no dlsym and no issue with vfork.
static void * real_vfork = reinterpret_cast<void *>(&vfork);
#endif
if (!real_vfork)
throw std::runtime_error("Cannot find vfork symbol");
pid_t pid = reinterpret_cast<pid_t (*)()>(real_vfork)();
if (-1 == pid)
throw std::runtime_error(fmt::format("Cannot vfork {}: {}", argv[0], errnoToString()));
/// Child
if (0 == pid)
{
sigset_t mask;
sigemptyset(&mask);
sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process
sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process
execvp(argv[0], argv);
_exit(-1);
}
int status = 0;
do
{
int exited_pid = waitpid(pid, &status, 0);
if (exited_pid != -1)
break;
if (errno == EINTR)
continue;
throw std::runtime_error(fmt::format("Cannot waitpid {}: {}", pid, errnoToString()));
} while (true);
return status;
}
void writeRetry(int fd, const std::string & data)
{
size_t bytes_written = 0;
const char * begin = data.c_str();
size_t offset = data.size();
while (bytes_written != offset)
{
ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written);
if ((-1 == res || 0 == res) && errno != EINTR)
throw std::runtime_error(fmt::format("Cannot write to {}: {}", fd, errnoToString()));
bytes_written += res;
}
}
std::string readFile(const std::string & path)
{
std::ifstream t(path);
std::string str;
t.seekg(0, std::ios::end);
str.reserve(t.tellg());
t.seekg(0, std::ios::beg);
str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
return str;
}
/// Simple wrapper for temporary files.
class TemporaryFile
{
private:
std::string path;
int fd = -1;
public:
explicit TemporaryFile(const char * pattern)
: path(pattern)
{
size_t dot_pos = path.rfind('.');
if (dot_pos != std::string::npos)
fd = ::mkstemps(path.data(), path.size() - dot_pos);
else
fd = ::mkstemp(path.data());
if (-1 == fd)
throw std::runtime_error(fmt::format("Cannot create temporary file {}: {}", path, errnoToString()));
}
~TemporaryFile()
{
try
{
close();
unlink();
}
catch (const std::runtime_error & e)
{
fmt::print(stderr, "{}", e.what());
}
}
void close()
{
if (fd == -1)
return;
if (0 != ::close(fd))
throw std::runtime_error(fmt::format("Cannot close temporary file {}: {}", path, errnoToString()));
fd = -1;
}
void write(const std::string & data)
{
if (fd == -1)
throw std::runtime_error(fmt::format("Cannot write to uninitialized file {}", path));
writeRetry(fd, data);
}
void unlink()
{
if (0 != ::unlink(path.c_str()))
throw std::runtime_error(fmt::format("Cannot remove temporary file {}: {}", path, errnoToString()));
}
std::string & getPath() { return path; }
};
/// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx.
/// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org)
/// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com)
@ -142,6 +305,7 @@ ReplxxLineReader::ReplxxLineReader(
replxx::Replxx::highlighter_callback_t highlighter_)
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_)), highlighter(std::move(highlighter_))
, editor(getEditor())
, fuzzy_finder(getFuzzyFinder())
{
using namespace std::placeholders;
using Replxx = replxx::Replxx;
@ -249,6 +413,17 @@ ReplxxLineReader::ReplxxLineReader(
return rx.invoke(Replxx::ACTION::COMMIT_LINE, code);
};
rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action);
/// interactive search in history (requires fzf/sk)
if (!fuzzy_finder.empty())
{
auto interactive_history_search = [this](char32_t code)
{
openInteractiveHistorySearch();
return rx.invoke(Replxx::ACTION::REPAINT, code);
};
rx.bind_key(Replxx::KEY::control('R'), interactive_history_search);
}
}
ReplxxLineReader::~ReplxxLineReader()
@ -293,116 +468,70 @@ void ReplxxLineReader::addToHistory(const String & line)
rx.print("Unlock of history file failed: %s\n", errnoToString().c_str());
}
/// See comments in ShellCommand::executeImpl()
/// (for the vfork via dlsym())
int ReplxxLineReader::executeEditor(const std::string & path)
{
std::vector<char> argv0(editor.data(), editor.data() + editor.size() + 1);
std::vector<char> argv1(path.data(), path.data() + path.size() + 1);
char * const argv[] = {argv0.data(), argv1.data(), nullptr};
static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");
if (!real_vfork)
{
rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString().c_str());
return -1;
}
pid_t pid = reinterpret_cast<pid_t (*)()>(real_vfork)();
if (-1 == pid)
{
rx.print("Cannot vfork: %s\n", errnoToString().c_str());
return -1;
}
/// Child
if (0 == pid)
{
sigset_t mask;
sigemptyset(&mask);
sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) // ok in newly created process
sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) // ok in newly created process
execvp(editor.c_str(), argv);
rx.print("Cannot execute %s: %s\n", editor.c_str(), errnoToString().c_str());
_exit(-1);
}
int status = 0;
do
{
int exited_pid = waitpid(pid, &status, 0);
if (exited_pid == -1)
{
if (errno == EINTR)
continue;
rx.print("Cannot waitpid: %s\n", errnoToString().c_str());
return -1;
}
else
break;
} while (true);
return status;
}
void ReplxxLineReader::openEditor()
{
char filename[] = "clickhouse_replxx_XXXXXX.sql";
int fd = ::mkstemps(filename, 4);
if (-1 == fd)
{
rx.print("Cannot create temporary file to edit query: %s\n", errnoToString().c_str());
return;
}
TemporaryFile editor_file("clickhouse_client_editor_XXXXXX.sql");
editor_file.write(rx.get_state().text());
editor_file.close();
replxx::Replxx::State state(rx.get_state());
size_t bytes_written = 0;
const char * begin = state.text();
size_t offset = strlen(state.text());
while (bytes_written != offset)
char * const argv[] = {editor.data(), editor_file.getPath().data(), nullptr};
try
{
ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written);
if ((-1 == res || 0 == res) && errno != EINTR)
if (executeCommand(argv) == 0)
{
rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString().c_str());
break;
const std::string & new_query = readFile(editor_file.getPath());
rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size()));
}
bytes_written += res;
}
if (0 != ::close(fd))
catch (const std::runtime_error & e)
{
rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString().c_str());
return;
}
if (0 == executeEditor(filename))
{
try
{
std::ifstream t(filename);
std::string str;
t.seekg(0, std::ios::end);
str.reserve(t.tellg());
t.seekg(0, std::ios::beg);
str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
rx.set_state(replxx::Replxx::State(str.c_str(), str.size()));
}
catch (...)
{
rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString().c_str());
return;
}
rx.print(e.what());
}
if (bracketed_paste_enabled)
enableBracketedPaste();
}
if (0 != ::unlink(filename))
rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString().c_str());
void ReplxxLineReader::openInteractiveHistorySearch()
{
assert(!fuzzy_finder.empty());
TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin");
auto hs(rx.history_scan());
while (hs.next())
{
history_file.write(hs.get().text());
history_file.write(std::string(1, '\0'));
}
history_file.close();
TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql");
output_file.close();
char sh[] = "sh";
char sh_c[] = "-c";
/// NOTE: You can use one of the following to configure the behaviour additionally:
/// - SKIM_DEFAULT_OPTIONS
/// - FZF_DEFAULT_OPTS
std::string fuzzy_finder_command = fmt::format(
"{} --read0 --tac --no-sort --tiebreak=index --bind=ctrl-r:toggle-sort --height=30% < {} > {}",
fuzzy_finder, history_file.getPath(), output_file.getPath());
char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr};
try
{
if (executeCommand(argv) == 0)
{
const std::string & new_query = readFile(output_file.getPath());
rx.set_state(replxx::Replxx::State(new_query.c_str(), new_query.size()));
}
}
catch (const std::runtime_error & e)
{
rx.print(e.what());
}
if (bracketed_paste_enabled)
enableBracketedPaste();
}
void ReplxxLineReader::enableBracketedPaste()

View File

@ -27,6 +27,7 @@ private:
void addToHistory(const String & line) override;
int executeEditor(const std::string & path);
void openEditor();
void openInteractiveHistorySearch();
replxx::Replxx rx;
replxx::Replxx::highlighter_callback_t highlighter;
@ -36,4 +37,5 @@ private:
bool bracketed_paste_enabled = false;
std::string editor;
std::string fuzzy_finder;
};

View File

@ -1,12 +1,12 @@
# This variables autochanged by release_lib.sh:
# This variables autochanged by tests/ci/version_helper.py:
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54466)
SET(VERSION_REVISION 54467)
SET(VERSION_MAJOR 22)
SET(VERSION_MINOR 9)
SET(VERSION_MINOR 10)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 09a2ff88435f79e5279745bbe1dc0e5e401df38d)
SET(VERSION_DESCRIBE v22.9.1.1-testing)
SET(VERSION_STRING 22.9.1.1)
SET(VERSION_GITHASH 3030d4c7ff09ec44ab07d0a8069ea923227288a1)
SET(VERSION_DESCRIBE v22.10.1.1-testing)
SET(VERSION_STRING 22.10.1.1)
# end of autochange

View File

@ -11,49 +11,89 @@ cmake_push_check_state ()
# All of them are unrelated to the instruction set at the host machine
# (you can compile for newer instruction set on old machines and vice versa).
option (ENABLE_SSSE3 "Use SSSE3 instructions on x86_64" 1)
option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1)
option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1)
option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1)
option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1)
option (ENABLE_AVX "Use AVX instructions on x86_64" 0)
option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0)
option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0)
option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0)
option (ENABLE_BMI "Use BMI instructions on x86_64" 0)
option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0)
option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0)
# X86: Allow compilation for a SSE2-only target machine. Done by a special build in CI for embedded or very old hardware.
option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64" 0)
if (NO_SSE3_OR_HIGHER)
SET(ENABLE_SSSE3 0)
SET(ENABLE_SSE41 0)
SET(ENABLE_SSE42 0)
SET(ENABLE_PCLMULQDQ 0)
SET(ENABLE_POPCNT 0)
SET(ENABLE_AVX 0)
SET(ENABLE_AVX2 0)
SET(ENABLE_AVX512 0)
SET(ENABLE_AVX512_VBMI 0)
SET(ENABLE_BMI 0)
SET(ENABLE_AVX2_FOR_SPEC_OP 0)
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
endif()
option (ARCH_NATIVE "Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use." 0)
if (ARCH_NATIVE)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
elseif (ARCH_AARCH64)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8-a+crc+simd+crypto+dotprod+ssbs")
# ARM publishes almost every year a new revision of it's ISA [1]. Each version comes with new mandatory and optional features from
# which CPU vendors can pick and choose. This creates a lot of variability ... We provide two build "profiles", one for maximum
# compatibility intended to run on all 64-bit ARM hardware released after 2013 (e.g. Raspberry Pi 4), and one for modern ARM server
# CPUs, (e.g. Graviton).
#
# [1] https://en.wikipedia.org/wiki/AArch64
option (NO_ARMV81_OR_HIGHER "Disable ARMv8.1 or higher on Aarch64 for maximum compatibility with older/embedded hardware." 0)
if (NO_ARMV81_OR_HIGHER)
# crc32 is optional in v8.0 and mandatory in v8.1. Enable it as __crc32()* is used in lot's of places and even very old ARM CPUs
# support it.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8+crc")
else ()
# ARMv8.2 is quite ancient but the lowest common denominator supported by both Graviton 2 and 3 processors [1]. In particular, it
# includes LSE (made mandatory with ARMv8.1) which provides nice speedups without having to fall back to compat flag
# "-moutline-atomics" for v8.0 [2, 3, 4] that requires a recent glibc with runtime dispatch helper, limiting our ability to run on
# old OSs.
#
# simd: NEON, introduced as optional in v8.0, A few extensions were added with v8.1 but it's still not mandatory. Enables the
# compiler to auto-vectorize.
# sve: Scalable Vector Extensions, introduced as optional in v8.2. Available in Graviton 3 but not in Graviton 2, and most likely
# also not in CI machines. Compiler support for autovectorization is rudimentary at the time of writing, see [5]. Can be
# enabled one-fine-day (TM) but not now.
# ssbs: "Speculative Store Bypass Safe". Optional in v8.0, mandatory in v8.5. Meltdown/spectre countermeasure.
# crypto: SHA1, SHA256, AES. Optional in v8.0. In v8.4, further algorithms were added but it's still optional, see [6].
# dotprod: Scalar vector product (SDOT and UDOT instructions). Probably the most obscure extra flag with doubtful performance benefits
# but it has been activated since always, so why not enable it. It's not 100% clear in which revision this flag was
# introduced as optional, either in v8.2 [7] or in v8.4 [8].
# ldapr: Load-Acquire RCpc Register. Better support of release/acquire of atomics. Good for allocators and high contention code.
# Optional in v8.2, mandatory in v8.3 [9]. Supported in Graviton 2+, Azure and GCP instances. Generated from clang 15.
#
# [1] https://github.com/aws/aws-graviton-getting-started/blob/main/c-c%2B%2B.md
# [2] https://community.arm.com/arm-community-blogs/b/tools-software-ides-blog/posts/making-the-most-of-the-arm-architecture-in-gcc-10
# [3] https://mysqlonarm.github.io/ARM-LSE-and-MySQL/
# [4] https://dev.to/aws-builders/large-system-extensions-for-aws-graviton-processors-3eci
# [5] https://developer.arm.com/tools-and-software/open-source-software/developer-tools/llvm-toolchain/sve-support
# [6] https://developer.arm.com/documentation/100067/0612/armclang-Command-line-Options/-mcpu?lang=en
# [7] https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html
# [8] https://developer.arm.com/documentation/102651/a/What-are-dot-product-intructions-
# [9] https://developer.arm.com/documentation/dui0801/g/A64-Data-Transfer-Instructions/LDAPR?lang=en
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8.2-a+simd+crypto+dotprod+ssbs -Xclang=-target-feature -Xclang=+ldapr -Wno-unused-command-line-argument")
endif ()
elseif (ARCH_PPC64LE)
# Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC
set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS")
elseif (ARCH_AMD64)
option (ENABLE_SSSE3 "Use SSSE3 instructions on x86_64" 1)
option (ENABLE_SSE41 "Use SSE4.1 instructions on x86_64" 1)
option (ENABLE_SSE42 "Use SSE4.2 instructions on x86_64" 1)
option (ENABLE_PCLMULQDQ "Use pclmulqdq instructions on x86_64" 1)
option (ENABLE_POPCNT "Use popcnt instructions on x86_64" 1)
option (ENABLE_AVX "Use AVX instructions on x86_64" 0)
option (ENABLE_AVX2 "Use AVX2 instructions on x86_64" 0)
option (ENABLE_AVX512 "Use AVX512 instructions on x86_64" 0)
option (ENABLE_AVX512_VBMI "Use AVX512_VBMI instruction on x86_64 (depends on ENABLE_AVX512)" 0)
option (ENABLE_BMI "Use BMI instructions on x86_64" 0)
option (ENABLE_AVX2_FOR_SPEC_OP "Use avx2 instructions for specific operations on x86_64" 0)
option (ENABLE_AVX512_FOR_SPEC_OP "Use avx512 instructions for specific operations on x86_64" 0)
option (NO_SSE3_OR_HIGHER "Disable SSE3 or higher on x86_64 for maximum compatibility with older/embedded hardware." 0)
if (NO_SSE3_OR_HIGHER)
SET(ENABLE_SSSE3 0)
SET(ENABLE_SSE41 0)
SET(ENABLE_SSE42 0)
SET(ENABLE_PCLMULQDQ 0)
SET(ENABLE_POPCNT 0)
SET(ENABLE_AVX 0)
SET(ENABLE_AVX2 0)
SET(ENABLE_AVX512 0)
SET(ENABLE_AVX512_VBMI 0)
SET(ENABLE_BMI 0)
SET(ENABLE_AVX2_FOR_SPEC_OP 0)
SET(ENABLE_AVX512_FOR_SPEC_OP 0)
endif()
set (TEST_FLAG "-mssse3")
set (CMAKE_REQUIRED_FLAGS "${TEST_FLAG} -O0")
check_cxx_source_compiles("

42
cmake/git.cmake Normal file
View File

@ -0,0 +1,42 @@
find_package(Git)
# Make basic Git information available as variables. Such data will later be embedded into the build, e.g. for view SYSTEM.BUILD_OPTIONS.
if (Git_FOUND)
# Commit hash + whether the building workspace was dirty or not
execute_process(COMMAND
"${GIT_EXECUTABLE}" rev-parse HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_HASH
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Branch name
execute_process(COMMAND
"${GIT_EXECUTABLE}" rev-parse --abbrev-ref HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_BRANCH
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Date of the commit
SET(ENV{TZ} "UTC")
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=iso-local
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_DATE
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# Subject of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%s
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "Git HEAD commit hash: ${GIT_HASH}")
execute_process(
COMMAND ${GIT_EXECUTABLE} status
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
else()
message(STATUS "Git could not be found.")
endif()

View File

@ -1,22 +0,0 @@
# Print the status of the git repository (if git is available).
# This is useful for troubleshooting build failure reports
find_package(Git)
if (Git_FOUND)
execute_process(
COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_COMMIT_ID
OUTPUT_STRIP_TRAILING_WHITESPACE)
message(STATUS "HEAD's commit hash ${GIT_COMMIT_ID}")
execute_process(
COMMAND ${GIT_EXECUTABLE} status
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} OUTPUT_STRIP_TRAILING_WHITESPACE)
else()
message(STATUS "Git could not be found.")
endif()

View File

@ -51,6 +51,8 @@ if (CMAKE_CROSSCOMPILING)
set (ENABLE_GRPC OFF CACHE INTERNAL "")
set (ENABLE_HDFS OFF CACHE INTERNAL "")
set (ENABLE_EMBEDDED_COMPILER OFF CACHE INTERNAL "")
# use of drand48_data
set (ENABLE_AZURE_BLOB_STORAGE OFF CACHE INTERNAL "")
endif ()
# Don't know why but CXX_STANDARD doesn't work for cross-compilation

View File

@ -24,7 +24,10 @@ if (COMPILER_CLANG)
no_warning(c++98-compat-pedantic)
no_warning(c++98-compat)
no_warning(c++20-compat) # Use constinit in C++20 without warnings
no_warning(conversion)
no_warning(sign-conversion)
no_warning(implicit-int-conversion)
no_warning(implicit-int-float-conversion)
no_warning(shorten-64-to-32)
no_warning(ctad-maybe-unsupported) # clang 9+, linux-only
no_warning(disabled-macro-expansion)
no_warning(documentation-unknown-command)

View File

@ -107,7 +107,7 @@ if (ENABLE_TESTS)
add_contrib (googletest-cmake googletest)
endif()
add_contrib (llvm-cmake llvm)
add_contrib (llvm-project-cmake llvm-project)
add_contrib (libxml2-cmake libxml2)
add_contrib (aws-s3-cmake
aws
@ -159,6 +159,8 @@ add_contrib (s2geometry-cmake s2geometry)
add_contrib (c-ares-cmake c-ares)
add_contrib (qpl-cmake qpl)
add_contrib(annoy-cmake annoy)
# Put all targets defined here and in subdirectories under "contrib/<immediate-subdir>" folders in GUI-based IDEs.
# Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear
# in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually,

1
contrib/annoy vendored Submodule

@ -0,0 +1 @@
Subproject commit f2ac8e7b48f9a9cf676d3b58286e5455aba8e956

View File

@ -0,0 +1,24 @@
option(ENABLE_ANNOY "Enable Annoy index support" ${ENABLE_LIBRARIES})
# Annoy index should be disabled with undefined sanitizer. Because of memory storage optimizations
# (https://github.com/ClickHouse/annoy/blob/9d8a603a4cd252448589e84c9846f94368d5a289/src/annoylib.h#L442-L463)
# UBSan fails and leads to crash. Simmilar issue is already opened in Annoy repo
# https://github.com/spotify/annoy/issues/456
# Problem with aligment can lead to errors like
# (https://stackoverflow.com/questions/46790550/c-undefined-behavior-strict-aliasing-rule-or-incorrect-alignment)
# or will lead to crash on arm https://developer.arm.com/documentation/ka003038/latest
# This issues should be resolved before annoy became non-experimental (--> setting "allow_experimental_annoy_index")
if ((NOT ENABLE_ANNOY) OR (SANITIZE STREQUAL "undefined") OR (ARCH_AARCH64))
message (STATUS "Not using annoy")
return()
endif()
set(ANNOY_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/annoy")
set(ANNOY_SOURCE_DIR "${ANNOY_PROJECT_DIR}/src")
add_library(_annoy INTERFACE)
target_include_directories(_annoy SYSTEM INTERFACE ${ANNOY_SOURCE_DIR})
add_library(ch_contrib::annoy ALIAS _annoy)
target_compile_definitions(_annoy INTERFACE ENABLE_ANNOY)
target_compile_definitions(_annoy INTERFACE ANNOYLIB_MULTITHREADED_BUILD)

View File

@ -1,4 +1,4 @@
if(ARCH_AMD64 OR ARCH_AARCH64)
if(ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE)
option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES})
elseif(ENABLE_BASE64)
message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64")
@ -26,7 +26,11 @@ if (ARCH_AMD64)
target_compile_options(_base64_avx PRIVATE -falign-loops -mavx)
target_compile_options(_base64_avx2 PRIVATE -falign-loops -mavx2)
else ()
target_compile_options(_base64_ssse3 PRIVATE -falign-loops)
if (ARCH_PPC64LE)
target_compile_options(_base64_ssse3 PRIVATE -D__SSSE3__ -falign-loops)
else()
target_compile_options(_base64_ssse3 PRIVATE -falign-loops)
endif()
endif ()
if (ARCH_AMD64)

1
contrib/llvm vendored

@ -1 +0,0 @@
Subproject commit 0db5bf5bd2452cd8f1283a1fcdc04845af705bfc

View File

@ -1,112 +0,0 @@
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined")
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
else()
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
endif()
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER)
message(STATUS "Not using LLVM")
return()
endif()
set (LLVM_FOUND 1)
set (LLVM_VERSION "12.0.0bundled")
set (LLVM_INCLUDE_DIRS
"${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm/include"
"${ClickHouse_BINARY_DIR}/contrib/llvm/llvm/include"
)
set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm")
# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles.
set (REQUIRED_LLVM_LIBRARIES
LLVMExecutionEngine
LLVMRuntimeDyld
LLVMAsmPrinter
LLVMDebugInfoDWARF
LLVMGlobalISel
LLVMSelectionDAG
LLVMMCDisassembler
LLVMPasses
LLVMCodeGen
LLVMipo
LLVMBitWriter
LLVMInstrumentation
LLVMScalarOpts
LLVMAggressiveInstCombine
LLVMInstCombine
LLVMVectorize
LLVMTransformUtils
LLVMTarget
LLVMAnalysis
LLVMProfileData
LLVMObject
LLVMBitReader
LLVMCore
LLVMRemarks
LLVMBitstreamReader
LLVMMCParser
LLVMMC
LLVMBinaryFormat
LLVMDebugInfoCodeView
LLVMSupport
LLVMDemangle
)
if (ARCH_AMD64)
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen)
elseif (ARCH_AARCH64)
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
endif ()
#function(llvm_libs_all REQUIRED_LLVM_LIBRARIES)
# llvm_map_components_to_libnames (result all)
# if (USE_STATIC_LIBRARIES OR NOT "LLVM" IN_LIST result)
# list (REMOVE_ITEM result "LTO" "LLVM")
# else()
# set (result "LLVM")
# endif ()
# list (APPEND result ${CMAKE_DL_LIBS} ch_contrib::zlib)
# set (${REQUIRED_LLVM_LIBRARIES} ${result} PARENT_SCOPE)
#endfunction()
message (STATUS "LLVM include Directory: ${LLVM_INCLUDE_DIRS}")
message (STATUS "LLVM library Directory: ${LLVM_LIBRARY_DIRS}")
message (STATUS "LLVM C++ compiler flags: ${LLVM_CXXFLAGS}")
# ld: unknown option: --color-diagnostics
set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "")
# Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind
set (CMAKE_INSTALL_RPATH "ON")
set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "")
set (LLVM_ENABLE_EH 1 CACHE INTERNAL "")
set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "")
set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "")
set (LLVM_TARGETS_TO_BUILD "X86;AArch64" CACHE STRING "")
# Need to use C++17 since the compilation is not possible with C++20 currently, due to ambiguous operator != etc.
# LLVM project will set its default value for the -std=... but our global setting from CMake will override it.
set (CMAKE_CXX_STANDARD 17)
set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/llvm")
set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm")
add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")
set_directory_properties (PROPERTIES
# due to llvm crosscompile cmake does not know how to clean it, and on clean
# will lead to the following error:
#
# ninja: error: remove(contrib/llvm/llvm/NATIVE): Directory not empty
#
ADDITIONAL_CLEAN_FILES "${LLVM_BINARY_DIR}"
# llvm's cmake configuring this file only when cmake runs,
# and after clean cmake will not know that it should re-run,
# add explicitly depends from llvm-config.h
CMAKE_CONFIGURE_DEPENDS "${LLVM_BINARY_DIR}/include/llvm/Config/llvm-config.h"
)
add_library (_llvm INTERFACE)
target_link_libraries (_llvm INTERFACE ${REQUIRED_LLVM_LIBRARIES})
target_include_directories (_llvm SYSTEM BEFORE INTERFACE ${LLVM_INCLUDE_DIRS})
add_library(ch_contrib::llvm ALIAS _llvm)

1
contrib/llvm-project vendored Submodule

@ -0,0 +1 @@
Subproject commit dc972a767ff2e9488d96cb2a6e67de160fbe15a7

View File

@ -0,0 +1,122 @@
if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined" OR NOT USE_STATIC_LIBRARIES)
set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF)
else()
set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON)
endif()
option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT})
if (NOT ENABLE_EMBEDDED_COMPILER)
message(STATUS "Not using LLVM")
return()
endif()
# TODO: Enable shared library build
# TODO: Enable compilation on AArch64
set (LLVM_VERSION "14.0.0bundled")
set (LLVM_INCLUDE_DIRS
"${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include"
"${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm/include"
)
set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
# This list was generated by listing all LLVM libraries, compiling the binary and removing all libraries while it still compiles.
set (REQUIRED_LLVM_LIBRARIES
LLVMExecutionEngine
LLVMRuntimeDyld
LLVMAsmPrinter
LLVMDebugInfoDWARF
LLVMGlobalISel
LLVMSelectionDAG
LLVMMCDisassembler
LLVMPasses
LLVMCodeGen
LLVMipo
LLVMBitWriter
LLVMInstrumentation
LLVMScalarOpts
LLVMAggressiveInstCombine
LLVMInstCombine
LLVMVectorize
LLVMTransformUtils
LLVMTarget
LLVMAnalysis
LLVMProfileData
LLVMObject
LLVMBitReader
LLVMCore
LLVMRemarks
LLVMBitstreamReader
LLVMMCParser
LLVMMC
LLVMBinaryFormat
LLVMDebugInfoCodeView
LLVMSupport
LLVMDemangle
)
# if (ARCH_AMD64)
list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen)
# elseif (ARCH_AARCH64)
# list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen)
# endif ()
# ld: unknown option: --color-diagnostics
# set (LINKER_SUPPORTS_COLOR_DIAGNOSTICS 0 CACHE INTERNAL "")
set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind
set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") # Skip internal compiler selection
set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") # With exception handling
set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "")
set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "")
set (LLVM_TARGETS_TO_BUILD "X86" CACHE STRING "") # for x86 + ARM: "X86;AArch64"
# Omit unnecessary stuff (just the options which are ON by default)
set(LLVM_ENABLE_BACKTRACES 0 CACHE INTERNAL "")
set(LLVM_ENABLE_CRASH_OVERRIDES 0 CACHE INTERNAL "")
set(LLVM_ENABLE_TERMINFO 0 CACHE INTERNAL "")
set(LLVM_ENABLE_LIBXML2 0 CACHE INTERNAL "")
set(LLVM_ENABLE_LIBEDIT 0 CACHE INTERNAL "")
set(LLVM_ENABLE_LIBPFM 0 CACHE INTERNAL "")
set(LLVM_ENABLE_ZLIB 0 CACHE INTERNAL "")
set(LLVM_ENABLE_Z3_SOLVER 0 CACHE INTERNAL "")
set(LLVM_INCLUDE_TOOLS 0 CACHE INTERNAL "")
set(LLVM_BUILD_TOOLS 0 CACHE INTERNAL "")
set(LLVM_INCLUDE_UTILS 0 CACHE INTERNAL "")
set(LLVM_BUILD_UTILS 0 CACHE INTERNAL "")
set(LLVM_INCLUDE_RUNTIMES 0 CACHE INTERNAL "")
set(LLVM_BUILD_RUNTIMES 0 CACHE INTERNAL "")
set(LLVM_BUILD_RUNTIME 0 CACHE INTERNAL "")
set(LLVM_INCLUDE_EXAMPLES 0 CACHE INTERNAL "")
set(LLVM_INCLUDE_TESTS 0 CACHE INTERNAL "")
set(LLVM_INCLUDE_GO_TESTS 0 CACHE INTERNAL "")
set(LLVM_INCLUDE_BENCHMARKS 0 CACHE INTERNAL "")
set(LLVM_INCLUDE_DOCS 0 CACHE INTERNAL "")
set(LLVM_ENABLE_OCAMLDOC 0 CACHE INTERNAL "")
set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "")
# C++20 is currently not supported due to ambiguous operator != etc.
set (CMAKE_CXX_STANDARD 17)
set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm")
set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm")
add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}")
set_directory_properties (PROPERTIES
# due to llvm crosscompile cmake does not know how to clean it, and on clean
# will lead to the following error:
#
# ninja: error: remove(contrib/llvm/llvm/NATIVE): Directory not empty
#
ADDITIONAL_CLEAN_FILES "${LLVM_BINARY_DIR}"
# llvm's cmake configuring this file only when cmake runs,
# and after clean cmake will not know that it should re-run,
# add explicitly depends from llvm-config.h
CMAKE_CONFIGURE_DEPENDS "${LLVM_BINARY_DIR}/include/llvm/Config/llvm-config.h"
)
add_library (_llvm INTERFACE)
target_link_libraries (_llvm INTERFACE ${REQUIRED_LLVM_LIBRARIES})
target_include_directories (_llvm SYSTEM BEFORE INTERFACE ${LLVM_INCLUDE_DIRS})
add_library(ch_contrib::llvm ALIAS _llvm)

2
contrib/openldap vendored

@ -1 +1 @@
Subproject commit 0208811b6043ca06fda8631a5e473df1ec515ccb
Subproject commit 8688afe6bc95ebcd20edf4578c536362218cb70a

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 9fec8e11dbb6a352e1cfba8cc9e23ebd7fb77310
Subproject commit 76746b35d0e254eaaba71dc3b79e46cba8cbb144

View File

@ -128,6 +128,7 @@ def parse_env_variables(
DARWIN_SUFFIX = "-darwin"
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
ARM_SUFFIX = "-aarch64"
ARM_V80COMPAT_SUFFIX = "-aarch64-v80compat"
FREEBSD_SUFFIX = "-freebsd"
PPC_SUFFIX = "-ppc64le"
AMD64_SSE2_SUFFIX = "-amd64sse2"
@ -140,6 +141,7 @@ def parse_env_variables(
is_cross_darwin = compiler.endswith(DARWIN_SUFFIX)
is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX)
is_cross_arm = compiler.endswith(ARM_SUFFIX)
is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX)
is_cross_ppc = compiler.endswith(PPC_SUFFIX)
is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX)
is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX)
@ -178,6 +180,13 @@ def parse_env_variables(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake"
)
result.append("DEB_ARCH=arm64")
elif is_cross_arm_v80compat:
cc = compiler[: -len(ARM_V80COMPAT_SUFFIX)]
cmake_flags.append(
"-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-aarch64.cmake"
)
cmake_flags.append("-DNO_ARMV81_OR_HIGHER=1")
result.append("DEB_ARCH=arm64")
elif is_cross_freebsd:
cc = compiler[: -len(FREEBSD_SUFFIX)]
cmake_flags.append(
@ -343,6 +352,7 @@ if __name__ == "__main__":
"clang-15-darwin",
"clang-15-darwin-aarch64",
"clang-15-aarch64",
"clang-15-aarch64-v80compat",
"clang-15-ppc64le",
"clang-15-amd64sse2",
"clang-15-freebsd",

View File

@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="22.8.5.29"
ARG VERSION="22.9.3.18"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# user/group precreated explicitly with fixed uid/gid on purpose.

View File

@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="22.8.5.29"
ARG VERSION="22.9.3.18"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -106,8 +106,8 @@ fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
HTTPS_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=https_port)"
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port --try)"
HTTPS_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=https_port --try)"
if [ -n "$HTTP_PORT" ]; then
URL="http://127.0.0.1:$HTTP_PORT/ping"

View File

@ -157,7 +157,6 @@ function run_cmake
"-DUSE_UNWIND=1"
"-DENABLE_NURAFT=1"
"-DENABLE_JEMALLOC=1"
"-DENABLE_REPLXX=1"
)
export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache"

View File

@ -13,25 +13,28 @@ sysctl kernel.core_pattern='core.%e.%p-%P'
# Thread Fuzzer allows to check more permutations of possible thread scheduling
# and find more potential issues.
# Temporarily disable ThreadFuzzer with tsan because of https://github.com/google/sanitizers/issues/1540
is_tsan_build=$(clickhouse local -q "select value like '% -fsanitize=thread %' from system.build_options where name='CXX_FLAGS'")
if [ "$is_tsan_build" -eq "0" ]; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
export THREAD_FUZZER_SLEEP_TIME_US=100000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
fi
function install_packages()
@ -243,7 +246,7 @@ export USE_S3_STORAGE_FOR_MERGE_TREE=1
configure
# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|<disk>s3</disk>|<disk>s3</disk><disk>default</disk>|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
@ -338,6 +341,12 @@ echo $previous_release_tag | download_release_packets && echo -e 'Download scrip
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.tsv.gz ||:
done
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
# Check if we cloned previous release repository successfully
if ! [ "$(ls -A previous_release_repository/tests/queries)" ]
@ -454,6 +463,7 @@ else
-e "This engine is deprecated and is not supported in transactions" \
-e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \
-e "The set of parts restored in place of" \
-e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \
/var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
@ -496,6 +506,12 @@ else
# Remove file bc_check_fatal_messages.txt if it's empty
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||:
for table in query_log trace_log
do
clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | pigz > /test_output/$table.backward.tsv.gz ||:
done
fi
dmesg -T > /test_output/dmesg.log
@ -505,17 +521,8 @@ grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e
&& echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
mv /var/log/clickhouse-server/stderr.log /test_output/
# Replace the engine with Ordinary to avoid extra symlinks stuff in artifacts.
# (so that clickhouse-local --path can read it w/o extra care).
sed -i -e "s/ATTACH DATABASE _ UUID '[^']*'/ATTACH DATABASE system/" -e "s/Atomic/Ordinary/" /var/lib/clickhouse/metadata/system.sql
for table in query_log trace_log; do
sed -i "s/ATTACH TABLE _ UUID '[^']*'/ATTACH TABLE $table/" /var/lib/clickhouse/metadata/system/${table}.sql
tar -chf /test_output/${table}_dump.tar /var/lib/clickhouse/metadata/system.sql /var/lib/clickhouse/metadata/system/${table}.sql /var/lib/clickhouse/data/system/${table} ||:
done
# Write check result into check_status.tsv
clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv

View File

@ -12,7 +12,16 @@ then
DIR="amd64"
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
then
DIR="aarch64"
# If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0
# compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo.
# Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake).
ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/')
if [ "${ARMV82}" ]
then
DIR="aarch64"
else
DIR="aarch64v80compat"
fi
elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ]
then
DIR="powerpc64le"
@ -22,12 +31,6 @@ then
if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ]
then
DIR="freebsd"
elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ]
then
DIR="freebsd-aarch64"
elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ]
then
DIR="freebsd-powerpc64le"
fi
elif [ "${OS}" = "Darwin" ]
then
@ -42,7 +45,7 @@ fi
if [ -z "${DIR}" ]
then
echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported."
echo "Operating system '${OS}' / architecture '${ARCH}' is unsupported."
exit 1
fi

View File

@ -0,0 +1,56 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.3.13.80-lts (e2708b01fba) FIXME as compared to v22.3.12.19-lts (4a08f8a073b)
#### New Feature
* Backported in [#41264](https://github.com/ClickHouse/ClickHouse/issues/41264): Implemented automatic conversion of database engine from `Ordinary` to `Atomic`. Create empty `convert_ordinary_to_atomic` file in `flags` directory and all `Ordinary` databases will be converted automatically on next server start. Resolves [#39546](https://github.com/ClickHouse/ClickHouse/issues/39546). [#39933](https://github.com/ClickHouse/ClickHouse/pull/39933) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#40886](https://github.com/ClickHouse/ClickHouse/issues/40886): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)).
#### Bug Fix
* Backported in [#41273](https://github.com/ClickHouse/ClickHouse/issues/41273): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Build/Testing/Packaging Improvement
* Backported in [#41557](https://github.com/ClickHouse/ClickHouse/issues/41557): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#40745](https://github.com/ClickHouse/ClickHouse/issues/40745): * Fix cast lowcard of nullable in JoinSwitcher, close [#37385](https://github.com/ClickHouse/ClickHouse/issues/37385). [#37453](https://github.com/ClickHouse/ClickHouse/pull/37453) ([Vladimir C](https://github.com/vdimir)).
* Backported in [#41812](https://github.com/ClickHouse/ClickHouse/issues/41812): Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621). [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41320](https://github.com/ClickHouse/ClickHouse/issues/41320): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)).
* Backported in [#40904](https://github.com/ClickHouse/ClickHouse/issues/40904): Fix potential deadlock in WriteBufferFromS3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#41916](https://github.com/ClickHouse/ClickHouse/issues/41916): Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)).
* Backported in [#40903](https://github.com/ClickHouse/ClickHouse/issues/40903): Proxy resolver stop on first successful request to endpoint. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#40663](https://github.com/ClickHouse/ClickHouse/issues/40663): Fix potential dataloss due to a bug in AWS SDK (https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)).
* Backported in [#40901](https://github.com/ClickHouse/ClickHouse/issues/40901): Fix memory leak while pushing to MVs w/o query context (from Kafka/...). [#40732](https://github.com/ClickHouse/ClickHouse/pull/40732) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#41637](https://github.com/ClickHouse/ClickHouse/issues/41637): Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#41664](https://github.com/ClickHouse/ClickHouse/issues/41664): Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#41356](https://github.com/ClickHouse/ClickHouse/issues/41356): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#41807](https://github.com/ClickHouse/ClickHouse/issues/41807): Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41804](https://github.com/ClickHouse/ClickHouse/issues/41804): The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41503](https://github.com/ClickHouse/ClickHouse/issues/41503): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41639](https://github.com/ClickHouse/ClickHouse/issues/41639): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#41899](https://github.com/ClickHouse/ClickHouse/issues/41899): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### Bug Fix (user-visible misbehaviour in official stable or prestable release)
* Backported in [#41321](https://github.com/ClickHouse/ClickHouse/issues/41321): Fix bug in function `if` when resulting column type differs with resulting data type that led to logical errors like `Logical error: 'Bad cast from type DB::ColumnVector<int> to DB::ColumnVector<long>'.`. Closes [#35367](https://github.com/ClickHouse/ClickHouse/issues/35367). [#35476](https://github.com/ClickHouse/ClickHouse/pull/35476) ([Kruglov Pavel](https://github.com/Avogar)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* DNSResolver remove AI_V4MAPPED, AI_ALL hints [#40502](https://github.com/ClickHouse/ClickHouse/pull/40502) ([Maksim Kita](https://github.com/kitaisreal)).
* Migrate artifactory [#40831](https://github.com/ClickHouse/ClickHouse/pull/40831) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Download ccache from release PRs for backports [#41328](https://github.com/ClickHouse/ClickHouse/pull/41328) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Remove `-WithTerminatingZero` methods [#41431](https://github.com/ClickHouse/ClickHouse/pull/41431) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove trash from Field [#41457](https://github.com/ClickHouse/ClickHouse/pull/41457) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Mask some information in logs. [#41474](https://github.com/ClickHouse/ClickHouse/pull/41474) ([Vitaly Baranov](https://github.com/vitlibar)).
* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).

View File

@ -0,0 +1,52 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.7.6.74-stable (c00ffb3c11a) FIXME as compared to v22.7.5.13-stable (6f48d2d1f59)
#### New Feature
* Backported in [#40869](https://github.com/ClickHouse/ClickHouse/issues/40869): Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)).
#### Bug Fix
* Backported in [#41228](https://github.com/ClickHouse/ClickHouse/issues/41228): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Build/Testing/Packaging Improvement
* Backported in [#41559](https://github.com/ClickHouse/ClickHouse/issues/41559): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#41283](https://github.com/ClickHouse/ClickHouse/issues/41283): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)).
* Backported in [#40865](https://github.com/ClickHouse/ClickHouse/issues/40865): - Fix crash while parsing values of type `Object` that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#40804](https://github.com/ClickHouse/ClickHouse/issues/40804): During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#40810](https://github.com/ClickHouse/ClickHouse/issues/40810): In [#40595](https://github.com/ClickHouse/ClickHouse/issues/40595) it was reported that the `host_regexp` functionality was not working properly with a name to address resolution in `/etc/hosts`. It's fixed. [#40769](https://github.com/ClickHouse/ClickHouse/pull/40769) ([Arthur Passos](https://github.com/arthurpassos)).
* Backported in [#41134](https://github.com/ClickHouse/ClickHouse/issues/41134): Fix access rights for `DESCRIBE TABLE url()` and some other `DESCRIBE TABLE <table_function>()`. [#40975](https://github.com/ClickHouse/ClickHouse/pull/40975) ([Vitaly Baranov](https://github.com/vitlibar)).
* Backported in [#41617](https://github.com/ClickHouse/ClickHouse/issues/41617): Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#41666](https://github.com/ClickHouse/ClickHouse/issues/41666): Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#41361](https://github.com/ClickHouse/ClickHouse/issues/41361): Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#41358](https://github.com/ClickHouse/ClickHouse/issues/41358): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#41808](https://github.com/ClickHouse/ClickHouse/issues/41808): Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41805](https://github.com/ClickHouse/ClickHouse/issues/41805): The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41505](https://github.com/ClickHouse/ClickHouse/issues/41505): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41644](https://github.com/ClickHouse/ClickHouse/issues/41644): Queries with `ORDER BY` and `1500 <= LIMIT <= max_block_size` could return incorrect result with missing rows from top. Fixes [#41182](https://github.com/ClickHouse/ClickHouse/issues/41182). [#41576](https://github.com/ClickHouse/ClickHouse/pull/41576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#41641](https://github.com/ClickHouse/ClickHouse/issues/41641): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#41900](https://github.com/ClickHouse/ClickHouse/issues/41900): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* use ROBOT_CLICKHOUSE_COMMIT_TOKEN for create-pull-request [#40067](https://github.com/ClickHouse/ClickHouse/pull/40067) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* use input token instead of env var [#40421](https://github.com/ClickHouse/ClickHouse/pull/40421) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Migrate artifactory [#40831](https://github.com/ClickHouse/ClickHouse/pull/40831) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* CaresPTRResolver small safety improvement [#40890](https://github.com/ClickHouse/ClickHouse/pull/40890) ([Arthur Passos](https://github.com/arthurpassos)).
* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Download ccache from release PRs for backports [#41328](https://github.com/ClickHouse/ClickHouse/pull/41328) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Increase open files limit [#41345](https://github.com/ClickHouse/ClickHouse/pull/41345) ([Eugene Konkov](https://github.com/ekonkov)).
* Remove `-WithTerminatingZero` methods [#41431](https://github.com/ClickHouse/ClickHouse/pull/41431) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove trash from Field [#41457](https://github.com/ClickHouse/ClickHouse/pull/41457) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Mask some information in logs. [#41474](https://github.com/ClickHouse/ClickHouse/pull/41474) ([Vitaly Baranov](https://github.com/vitlibar)).
* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -0,0 +1,56 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.8.6.71-lts (7bf38a43e30) FIXME as compared to v22.8.5.29-lts (74ffb843807)
#### Improvement
* Backported in [#41507](https://github.com/ClickHouse/ClickHouse/issues/41507): Fix incompatibility of cache after switching setting `do_no_evict_index_and_mark_files` from 1 to 0, 0 to 1. [#41330](https://github.com/ClickHouse/ClickHouse/pull/41330) ([Kseniia Sumarokova](https://github.com/kssenii)).
#### Bug Fix
* Backported in [#41229](https://github.com/ClickHouse/ClickHouse/issues/41229): Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Build/Testing/Packaging Improvement
* Backported in [#41560](https://github.com/ClickHouse/ClickHouse/issues/41560): Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#41284](https://github.com/ClickHouse/ClickHouse/issues/41284): Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)).
* Backported in [#41837](https://github.com/ClickHouse/ClickHouse/issues/41837): Fix vertical merge of parts with lightweight deleted rows. [#40559](https://github.com/ClickHouse/ClickHouse/pull/40559) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#41618](https://github.com/ClickHouse/ClickHouse/issues/41618): Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#41667](https://github.com/ClickHouse/ClickHouse/issues/41667): Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#41685](https://github.com/ClickHouse/ClickHouse/issues/41685): Fix possible wrong query result with `query_plan_optimize_primary_key` enabled. Fixes [#40599](https://github.com/ClickHouse/ClickHouse/issues/40599). [#41281](https://github.com/ClickHouse/ClickHouse/pull/41281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#41362](https://github.com/ClickHouse/ClickHouse/issues/41362): Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#41359](https://github.com/ClickHouse/ClickHouse/issues/41359): Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#41596](https://github.com/ClickHouse/ClickHouse/issues/41596): Fix possible deadlock with async_socket_for_remote/use_hedged_requests and parallel KILL. [#41343](https://github.com/ClickHouse/ClickHouse/pull/41343) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#41521](https://github.com/ClickHouse/ClickHouse/issues/41521): Since 22.8 `ON CLUSTER` clause is ignored if database is `Replicated` and cluster name and database name are the same. Because of this `DROP PARTITION ON CLUSTER` worked unexpected way with `Replicated`. It's fixed, now `ON CLUSTER` clause is ignored only for queries that are replicated on database level. Fixes [#41299](https://github.com/ClickHouse/ClickHouse/issues/41299). [#41390](https://github.com/ClickHouse/ClickHouse/pull/41390) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#41480](https://github.com/ClickHouse/ClickHouse/issues/41480): Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41806](https://github.com/ClickHouse/ClickHouse/issues/41806): The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41506](https://github.com/ClickHouse/ClickHouse/issues/41506): Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#41581](https://github.com/ClickHouse/ClickHouse/issues/41581): Fix possible hung/deadlock on query cancellation (`KILL QUERY` or server shutdown). [#41467](https://github.com/ClickHouse/ClickHouse/pull/41467) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#41645](https://github.com/ClickHouse/ClickHouse/issues/41645): Queries with `ORDER BY` and `1500 <= LIMIT <= max_block_size` could return incorrect result with missing rows from top. Fixes [#41182](https://github.com/ClickHouse/ClickHouse/issues/41182). [#41576](https://github.com/ClickHouse/ClickHouse/pull/41576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#41642](https://github.com/ClickHouse/ClickHouse/issues/41642): Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#41901](https://github.com/ClickHouse/ClickHouse/issues/41901): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#41836](https://github.com/ClickHouse/ClickHouse/issues/41836): Don't allow to create or alter merge tree tables with virtual column name _row_exists, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)).
* Backported in [#41890](https://github.com/ClickHouse/ClickHouse/issues/41890): Old versions of Replicated database doesn't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix stress test after [#40420](https://github.com/ClickHouse/ClickHouse/issues/40420) [#40608](https://github.com/ClickHouse/ClickHouse/pull/40608) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Change default in one cache setting [#41139](https://github.com/ClickHouse/ClickHouse/pull/41139) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Docker server version [#41256](https://github.com/ClickHouse/ClickHouse/pull/41256) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix download_binary, use proper version and commit [#41260](https://github.com/ClickHouse/ClickHouse/pull/41260) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Download ccache from release PRs for backports [#41328](https://github.com/ClickHouse/ClickHouse/pull/41328) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Increase open files limit [#41345](https://github.com/ClickHouse/ClickHouse/pull/41345) ([Eugene Konkov](https://github.com/ekonkov)).
* Remove `-WithTerminatingZero` methods [#41431](https://github.com/ClickHouse/ClickHouse/pull/41431) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove trash from Field [#41457](https://github.com/ClickHouse/ClickHouse/pull/41457) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Mask some information in logs. [#41474](https://github.com/ClickHouse/ClickHouse/pull/41474) ([Vitaly Baranov](https://github.com/vitlibar)).
* Build latest tags ONLY from master branch [#41567](https://github.com/ClickHouse/ClickHouse/pull/41567) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix debug build after [#41507](https://github.com/ClickHouse/ClickHouse/issues/41507) [#41597](https://github.com/ClickHouse/ClickHouse/pull/41597) ([Dmitry Novik](https://github.com/novikd)).
* Revert of "Revert the revert of "ColumnVector: optimize filter with AVX512 VBMI2 compress store" [#40033](https://github.com/ClickHouse/ClickHouse/issues/40033)" [#41752](https://github.com/ClickHouse/ClickHouse/pull/41752) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -0,0 +1,20 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.9.2.7-stable (362e2cefcef) FIXME as compared to v22.9.1.2603-stable (3030d4c7ff0)
#### Improvement
* Backported in [#41709](https://github.com/ClickHouse/ClickHouse/issues/41709): Check file path for path traversal attacks in errors logger for input formats. [#41694](https://github.com/ClickHouse/ClickHouse/pull/41694) ([Kruglov Pavel](https://github.com/Avogar)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#41696](https://github.com/ClickHouse/ClickHouse/issues/41696): Fixes issue when docker run will fail if "https_port" is not present in config. [#41693](https://github.com/ClickHouse/ClickHouse/pull/41693) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix typos in JSON formats after [#40910](https://github.com/ClickHouse/ClickHouse/issues/40910) [#41614](https://github.com/ClickHouse/ClickHouse/pull/41614) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -0,0 +1,23 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.9.3.18-stable (0cb4b15d2fa) FIXME as compared to v22.9.2.7-stable (362e2cefcef)
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#41902](https://github.com/ClickHouse/ClickHouse/issues/41902): Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#41863](https://github.com/ClickHouse/ClickHouse/issues/41863): 22.9 might fail to startup `ReplicatedMergeTree` table if that table was created by 20.3 or older version and was never altered, it's fixed. Fixes [#41742](https://github.com/ClickHouse/ClickHouse/issues/41742). [#41796](https://github.com/ClickHouse/ClickHouse/pull/41796) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#41892](https://github.com/ClickHouse/ClickHouse/issues/41892): Fix compact parts with compressed marks setting. Fixes [#41783](https://github.com/ClickHouse/ClickHouse/issues/41783) and [#41746](https://github.com/ClickHouse/ClickHouse/issues/41746). [#41823](https://github.com/ClickHouse/ClickHouse/pull/41823) ([alesapin](https://github.com/alesapin)).
* Backported in [#41891](https://github.com/ClickHouse/ClickHouse/issues/41891): Old versions of Replicated database doesn't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Revert of "Revert the revert of "ColumnVector: optimize filter with AVX512 VBMI2 compress store" [#40033](https://github.com/ClickHouse/ClickHouse/issues/40033)" [#41752](https://github.com/ClickHouse/ClickHouse/pull/41752) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update 02354_annoy.sql [#41767](https://github.com/ClickHouse/ClickHouse/pull/41767) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Fix the typo preventing building latest images [#41769](https://github.com/ClickHouse/ClickHouse/pull/41769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Revert "ColumnVector: optimize UInt8 index with AVX512VBMI ([#41247](https://github.com/ClickHouse/ClickHouse/issues/41247))" [#41797](https://github.com/ClickHouse/ClickHouse/pull/41797) ([Alexey Milovidov](https://github.com/alexey-milovidov)).

View File

@ -5,11 +5,49 @@ slug: /en/development/integrating_rust_libraries
Rust library integration will be described based on BLAKE3 hash-function integration.
The first step is forking a library and making necessary changes for Rust and C/C++ compatibility.
The first step of integration is to add the library to /rust folder. To do this, you need to create an empty Rust project and include the required library in Cargo.toml. It is also necessary to configure new library compilation as static by adding `crate-type = ["staticlib"]` to Cargo.toml.
After forking library repository you need to change target settings in Cargo.toml file. Firstly, you need to switch build to static library. Secondly, you need to add cbindgen crate to the crate list. We will use it later to generate C-header automatically.
Next, you need to link the library to CMake using Corrosion library. The first step is to add the library folder in the CMakeLists.txt inside the /rust folder. After that, you should add the CMakeLists.txt file to the library directory. In it, you need to call the Corrosion import function. These lines were used to import BLAKE3:
The next step is creating or editing the build.rs script for your library - and enable cbindgen to generate the header during library build. These lines were added to BLAKE3 build script for the same purpose:
```
corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD)
target_include_directories(_ch_rust_blake3 INTERFACE include)
add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)
```
Thus, we will create a correct CMake target using Corrosion, and then rename it with a more convenient name. Note that the name `_ch_rust_blake3` comes from Cargo.toml, where it is used as project name (`name = "_ch_rust_blake3"`).
Since Rust data types are not compatible with C/C++ data types, we will use our empty library project to create shim methods for conversion of data received from C/C++, calling library methods, and inverse conversion for output data. For example, this method was written for BLAKE3:
```
#[no_mangle]
pub unsafe extern "C" fn blake3_apply_shim(
begin: *const c_char,
_size: u32,
out_char_data: *mut u8,
) -> *mut c_char {
if begin.is_null() {
let err_str = CString::new("input was a null pointer").unwrap();
return err_str.into_raw();
}
let mut hasher = blake3::Hasher::new();
let input_bytes = CStr::from_ptr(begin);
let input_res = input_bytes.to_bytes();
hasher.update(input_res);
let mut reader = hasher.finalize_xof();
reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN));
std::ptr::null_mut()
}
```
This method gets C-compatible string, its size and output string pointer as input. Then, it converts C-compatible inputs into types that are used by actual library methods and calls them. After that, it should convert library methods' outputs back into C-compatible type. In that particular case library supported direct writing into pointer by method fill(), so the conversion was not needed. The main advice here is to create less methods, so you will need to do less conversions on each method call and won't create much overhead.
It is worth noting that the `#[no_mangle]` attribute and `extern "C"` are mandatory for all such methods. Without them, it will not be possible to perform a correct C/C++-compatible compilation. Moreover, they are necessary for the next step of the integration.
After writing the code for the shim methods, we need to prepare the header file for the library. This can be done manually, or you can use the cbindgen library for auto-generation. In case of using cbindgen, you will need to write a build.rs build script and include cbindgen as a build-dependency.
An example of a build script that can auto-generate a header file:
```
let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
@ -27,39 +65,9 @@ The next step is creating or editing the build.rs script for your library - and
}
```
As you can see, script sets the output directory and launches header generation.
The next step is to add CMake files into library directory, so it can build with other submodules. As you can see, BLAKE3 main directory contains two CMake files - CMakeLists.txt and build_rust_lib.cmake. The second one is a function, which calls cargo build and sets all needed paths for library build. You should copy it to your library and then you can adjust cargo flags and other settings for you library needs.
When finished with CMake configuration, you should move on to create a C/C++ compatible API for your library. Let us see BLAKE3's method blake3_apply_shim:
```
#[no_mangle]
pub unsafe extern "C" fn blake3_apply_shim(
begin: *const c_char,
_size: u32,
out_char_data: *mut u8,
) -> *mut c_char {
if begin.is_null() {
let err_str = CString::new("input was a null pointer").unwrap();
return err_str.into_raw();
}
let mut hasher = Hasher::new();
let input_bytes = CStr::from_ptr(begin);
let input_res = input_bytes.to_bytes();
hasher.update(input_res);
let mut reader = hasher.finalize_xof();
reader.fill(std::slice::from_raw_parts_mut(out_char_data, OUT_LEN));
std::ptr::null_mut()
}
```
This method gets C-compatible string, its size and output string pointer as input. Then, it converts C-compatible inputs into types that are used by actual library methods and calls them. After that, it should convert library methods' outputs back into C-compatible type. In that particular case library supported direct writing into pointer by method fill(), so the conversion was not needed. The main advice here is to create less methods, so you will need to do less conversions on each method call and won't create much overhead.
Also, you should use attribute #[no_mangle] and `extern "C"` for every C-compatible attribute. Without it library can compile incorrectly and cbindgen won't launch header autogeneration.
After all these steps you can test your library in a small project to find all problems with compatibility or header generation. If any problems occur during header generation, you can try to configure it with cbindgen.toml file (you can find an example of it in BLAKE3 directory or a template here: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml)). If everything works correctly, you can finally integrate its methods into ClickHouse.
After all these steps you can test your library in a small project to find all problems with compatibility or header generation. If any problems occur during header generation, you can try to configure it with cbindgen.toml file (you can find a template here: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml)).
In addition, some problems with integration are worth noting here:
1) Some architectures may require special cargo flags or build.rs configurations, so you may want to test cross-compilation for different platforms first.
2) MemorySanitizer can cause false-positive reports as it's unable to see if some variables in Rust are initialized or not. It was solved with writing a method with more explicit definition for some variables, although this implementation of method is slower and is used only to fix MemorySanitizer builds.
It is worth noting the problem that occurred when integrating BLAKE3:
MemorySanitizer can cause false-positive reports as it's unable to see if some variables in Rust are initialized or not. It was solved with writing a method with more explicit definition for some variables, although this implementation of method is slower and is used only to fix MemorySanitizer builds.

View File

@ -0,0 +1,127 @@
# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
The main task that indexes achieve is to quickly find nearest neighbors for multidimensional data. An example of such a problem can be finding similar pictures (texts) for a given picture (text). That problem can be reduced to finding the nearest [embeddings](https://cloud.google.com/architecture/overview-extracting-and-serving-feature-embeddings-for-machine-learning). They can be created from data using [UDF](../../../sql-reference/functions/index.md#executable-user-defined-functions).
The next query finds the closest neighbors in N-dimensional space using the L2 (Euclidean) distance:
``` sql
SELECT *
FROM table_name
WHERE L2Distance(Column, Point) < MaxDistance
LIMIT N
```
But it will take some time for execution because of the long calculation of the distance between `TargetEmbedding` and all other vectors. This is where ANN indexes can help. They store a compact approximation of the search space (e.g. using clustering, search trees, etc.) and are able to compute approximate neighbors quickly.
## Indexes Structure
Approximate Nearest Neighbor Search Indexes (`ANNIndexes`) are similar to skip indexes. They are constructed by some granules and determine which of them should be skipped. Compared to skip indices, ANN indices use their results not only to skip some group of granules, but also to select particular granules from a set of granules.
`ANNIndexes` are designed to speed up two types of queries:
- ###### Type 1: Where
``` sql
SELECT *
FROM table_name
WHERE DistanceFunction(Column, Point) < MaxDistance
LIMIT N
```
- ###### Type 2: Order by
``` sql
SELECT *
FROM table_name [WHERE ...]
ORDER BY DistanceFunction(Column, Point)
LIMIT N
```
In these queries, `DistanceFunction` is selected from [distance functions](../../../sql-reference/functions/distance-functions). `Point` is a known vector (something like `(0.1, 0.1, ... )`). To avoid writing large vectors, use [client parameters](../../../interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters). `Value` - a float value that will bound the neighbourhood.
!!! note "Note"
ANN index can't speed up query that satisfies both types(`where + order by`, only one of them). All queries must have the limit, as algorithms are used to find nearest neighbors and need a specific number of them.
!!! note "Note"
Indexes are applied only to queries with a limit less than the `max_limit_for_ann_queries` setting. This helps to avoid memory overflows in queries with a large limit. `max_limit_for_ann_queries` setting can be changed if you know you can provide enough memory. The default value is `1000000`.
Both types of queries are handled the same way. The indexes get `n` neighbors (where `n` is taken from the `LIMIT` clause) and work with them. In `ORDER BY` query they remember the numbers of all parts of the granule that have at least one of neighbor. In `WHERE` query they remember only those parts that satisfy the requirements.
## Create table with ANNIndex
This feature is disabled by default. To enable it, set `allow_experimental_annoy_index` to 1. Also, this feature is disabled for arm, due to likely problems with the algorithm.
```sql
CREATE TABLE t
(
`id` Int64,
`number` Tuple(Float32, Float32, Float32),
INDEX x number TYPE annoy GRANULARITY N
)
ENGINE = MergeTree
ORDER BY id;
```
```sql
CREATE TABLE t
(
`id` Int64,
`number` Array(Float32),
INDEX x number TYPE annoy GRANULARITY N
)
ENGINE = MergeTree
ORDER BY id;
```
With greater `GRANULARITY` indexes remember the data structure better. The `GRANULARITY` indicates how many granules will be used to construct the index. The more data is provided for the index, the more of it can be handled by one index and the more chances that with the right hyperparameters the index will remember the data structure better. But some indexes can't be built if they don't have enough data, so this granule will always participate in the query. For more information, see the description of indexes.
As the indexes are built only during insertions into table, `INSERT` and `OPTIMIZE` queries are slower than for ordinary table. At this stage indexes remember all the information about the given data. ANNIndexes should be used if you have immutable or rarely changed data and many read requests.
You can create your table with index which uses certain algorithm. Now only indices based on the following algorithms are supported:
# Index list
- [Annoy](../../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy)
# Annoy {#annoy}
Implementation of the algorithm was taken from [this repository](https://github.com/spotify/annoy).
Short description of the algorithm:
The algorithm recursively divides in half all space by random linear surfaces (lines in 2D, planes in 3D e.t.c.). Thus it makes tree of polyhedrons and points that they contains. Repeating the operation several times for greater accuracy it creates a forest.
To find K Nearest Neighbours it goes down through the trees and fills the buffer of closest points using the priority queue of polyhedrons. Next, it sorts buffer and return the nearest K points.
__Examples__:
```sql
CREATE TABLE t
(
id Int64,
number Tuple(Float32, Float32, Float32),
INDEX x number TYPE annoy(T) GRANULARITY N
)
ENGINE = MergeTree
ORDER BY id;
```
```sql
CREATE TABLE t
(
id Int64,
number Array(Float32),
INDEX x number TYPE annoy(T) GRANULARITY N
)
ENGINE = MergeTree
ORDER BY id;
```
!!! note "Note"
Table with array field will work faster, but all arrays **must** have same length. Use [CONSTRAINT](../../../sql-reference/statements/create/table.md#constraints) to avoid errors. For example, `CONSTRAINT constraint_name_1 CHECK length(number) = 256`.
Parameter `T` is the number of trees which algorithm will create. The bigger it is, the slower (approximately linear) it works (in both `CREATE` and `SELECT` requests), but the better accuracy you get (adjusted for randomness).
Annoy supports only `L2Distance`.
In the `SELECT` in the settings (`ann_index_select_query_params`) you can specify the size of the internal buffer (more details in the description above or in the [original repository](https://github.com/spotify/annoy)). During the query it will inspect up to `search_k` nodes which defaults to `n_trees * n` if not provided. `search_k` gives you a run-time tradeoff between better accuracy and speed.
__Example__:
``` sql
SELECT *
FROM table_name [WHERE ...]
ORDER BY L2Distance(Column, Point)
LIMIT N
SETTING ann_index_select_query_params=`k_search=100`
```

View File

@ -481,6 +481,10 @@ For example:
- `NOT startsWith(s, 'test')`
:::
## Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex}
In addition to skip indices, there are also [Approximate Nearest Neighbor Search Indexes](../../../engines/table-engines/mergetree-family/annindexes.md).
## Projections {#projections}
Projections are like [materialized views](../../../sql-reference/statements/create/view.md#materialized) but defined in part-level. It provides consistency guarantees along with automatic usage in queries.

File diff suppressed because one or more lines are too long

View File

@ -65,7 +65,9 @@ CREATE TABLE criteo
icat24 UInt32,
icat25 UInt32,
icat26 UInt32
) ENGINE = MergeTree(date, intHash32(icat1), (date, intHash32(icat1)), 8192)
) ENGINE = MergeTree()
PARTITION BY toYYYYMM(date)
ORDER BY (date, icat1)
```
Transform data from the raw log and put it in the second table:

View File

@ -29,12 +29,12 @@ The supported formats are:
| [SQLInsert](#sqlinsert) | ✗ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
| [JSON](#json) | | ✔ |
| [JSON](#json) | | ✔ |
| [JSONAsString](#jsonasstring) | ✔ | ✗ |
| [JSONStrings](#jsonstrings) | | ✔ |
| [JSONStrings](#jsonstrings) | | ✔ |
| [JSONColumns](#jsoncolumns) | ✔ | ✔ |
| [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | | ✔ |
| [JSONCompact](#jsoncompact) | | ✔ |
| [JSONColumnsWithMetadata](#jsoncolumnswithmetadata) | | ✔ |
| [JSONCompact](#jsoncompact) | | ✔ |
| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ |
| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ |
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
@ -47,6 +47,7 @@ The supported formats are:
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
| [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ |
| [TSKV](#tskv) | ✔ | ✔ |
| [Pretty](#pretty) | ✗ | ✔ |
| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ |
@ -608,8 +609,6 @@ If the query contains GROUP BY, rows_before_limit_at_least is the exact number o
`extremes` Extreme values (when extremes are set to 1).
This format is only appropriate for outputting a query result, but not for parsing (retrieving data to insert in a table).
ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](../operations/settings/settings.md#output_format_json_quote_denormals) to 1.
**See Also**
@ -617,6 +616,9 @@ ClickHouse supports [NULL](../sql-reference/syntax.md), which is displayed as `n
- [JSONEachRow](#jsoneachrow) format
- [output_format_json_array_of_rows](../operations/settings/settings.md#output_format_json_array_of_rows) setting
For JSON input format, if setting [input_format_json_validate_types_from_metadata](../operations/settings/settings.md#input_format_json_validate_types_from_metadata) is set to 1,
the types from metadata in input data will be compared with the types of the corresponding columns from the table.
## JSONStrings {#jsonstrings}
Differs from JSON only in that data fields are output in strings, not in typed JSON values.
@ -693,8 +695,8 @@ Columns that are not present in the block will be filled with default values (yo
## JSONColumnsWithMetadata {#jsoncolumnsmonoblock}
Differs from JSONColumns output format in that it also outputs some metadata and statistics (similar to JSON output format).
This format buffers all data in memory and then outputs them as a single block, so, it can lead to high memory consumption.
Differs from JSONColumns format in that it also contains some metadata and statistics (similar to JSON format).
Output format buffers all data in memory and then outputs them as a single block, so, it can lead to high memory consumption.
Example:
```json
@ -736,6 +738,9 @@ Example:
}
```
For JSONColumnsWithMetadata input format, if setting [input_format_json_validate_types_from_metadata](../operations/settings/settings.md#input_format_json_validate_types_from_metadata) is set to 1,
the types from metadata in input data will be compared with the types of the corresponding columns from the table.
## JSONAsString {#jsonasstring}
In this format, a single JSON object is interpreted as a single value. If the input has several JSON objects (comma separated), they are interpreted as separate rows. If the input data is enclosed in square brackets, it is interpreted as an array of JSONs.
@ -1001,6 +1006,21 @@ the types from input data will be compared with the types of the corresponding c
[44, "hello", [0,1,2,3]]
```
## JSONObjectEachRow {#jsonobjecteachrow}
In this format, all data is represented as a single JSON Object, each row is represented as separate field of this object similar to JSONEachRow format.
Example:
```json
{
"row_1": {"num": 42, "str": "hello", "arr": [0,1]},
"row_2": {"num": 43, "str": "hello", "arr": [0,1,2]},
"row_3": {"num": 44, "str": "hello", "arr": [0,1,2,3]}
}
```
### Inserting Data {#json-inserting-data}
``` sql
@ -1124,11 +1144,15 @@ SELECT * FROM json_each_row_nested
- [input_format_import_nested_json](../operations/settings/settings.md#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`.
- [input_format_json_read_bools_as_numbers](../operations/settings/settings.md#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`.
- [input_format_json_read_numbers_as_strings](../operations/settings/settings.md#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`.
- [output_format_json_quote_64bit_integers](../operations/settings/settings.md#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
- [output_format_json_quote_64bit_floats](../operations/settings/settings.md#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
- [output_format_json_quote_denormals](../operations/settings/settings.md#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
- [output_format_json_quote_decimals](../operations/settings/settings.md#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`.
- [output_format_json_escape_forward_slashes](../operations/settings/settings.md#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`.
- [output_format_json_named_tuples_as_objects](../operations/settings/settings.md#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `false`.
- [output_format_json_array_of_rows](../operations/settings/settings.md#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`.
- [output_format_json_validate_utf8](../operations/settings/settings.md#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`.
## Native {#native}

View File

@ -84,8 +84,8 @@ In the following example a table is created and loaded with data from a CSV file
``` bash
./clickhouse-grpc-client.py -q "CREATE TABLE grpc_example_table (id UInt32, text String) ENGINE = MergeTree() ORDER BY id;"
echo "0,Input data for" > a.txt ; echo "1,gRPC protocol example" >> a.txt
cat a.txt | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV"
echo -e "0,Input data for\n1,gRPC protocol example" > a.csv
cat a.csv | ./clickhouse-grpc-client.py -q "INSERT INTO grpc_example_table FORMAT CSV"
./clickhouse-grpc-client.py --format PrettyCompact -q "SELECT * FROM grpc_example_table;"
```

View File

@ -340,7 +340,7 @@ ClickHouse supports specific queries through the HTTP interface. For example, yo
$ echo '(4),(5),(6)' | curl 'http://localhost:8123/?query=INSERT%20INTO%20t%20VALUES' --data-binary @-
```
ClickHouse also supports Predefined HTTP Interface which can help you more easily integrate with third-party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter).
ClickHouse also supports Predefined HTTP Interface which can help you more easily integrate with third-party tools like [Prometheus exporter](https://github.com/ClickHouse/clickhouse_exporter).
Example:

View File

@ -1498,7 +1498,7 @@ If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
- `move_factor` is ignored.
- `keep_free_space_bytes` is ignored.
- `max_data_part_size_bytes` is ignored.
- Уou must have exactly one volume in that policy.
- Policy should have exactly one volume with local disks.
:::
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}

View File

@ -15,7 +15,7 @@ Possible values:
- Any positive integer.
Default value: 10.
Default value: 100.
Override example in `config.xml`:
@ -231,7 +231,7 @@ Possible values:
- Any positive integer.
Default value: 1800
Default value: 10800
## try_fetch_recompressed_part_timeout
@ -261,7 +261,7 @@ Possible values:
- Any positive integer.
Default value: 10
Default value: 100
## max_suspicious_broken_parts_bytes

View File

@ -3147,12 +3147,14 @@ Result:
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
Enables or disables returning results of type `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth).
Enables or disables returning results of type:
- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth).
- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot).
Possible values:
- 0 — Functions return `Date` for all types of arguments.
- 1 — Functions return `Date32` for `Date32` or `DateTime64` arguments and `Date` otherwise.
- 0 — Functions return `Date` or `DateTime` for all types of arguments.
- 1 — Functions return `Date32` or `DateTime64` for `Date32` or `DateTime64` arguments and `Date` or `DateTime` otherwise.
Default value: `0`.
@ -3705,6 +3707,19 @@ Allow parsing bools as numbers in JSON input formats.
Enabled by default.
### input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings}
Allow parsing numbers as strings in JSON input formats.
Disabled by default.
### input_format_json_validate_types_from_metadata {#input_format_json_validate_types_from_metadata}
For JSON/JSONCompact/JSONColumnsWithMetadata input formats, if this setting is set to 1,
the types from metadata in input data will be compared with the types of the corresponding columns from the table.
Enabled by default.
### output_format_json_quote_64bit_integers {#output_format_json_quote_64bit_integers}
Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md#json) format.
@ -3717,6 +3732,12 @@ Possible values:
Default value: 1.
### output_format_json_quote_64bit_floats {#output_format_json_quote_64bit_floats}
Controls quoting of 64-bit [floats](../../sql-reference/data-types/float.md) when they are output in JSON* formats.
Disabled by default.
### output_format_json_quote_denormals {#output_format_json_quote_denormals}
Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md#json) output format.
@ -3816,6 +3837,12 @@ When `output_format_json_quote_denormals = 1`, the query returns:
}
```
### output_format_json_quote_decimals {#output_format_json_quote_decimals}
Controls quoting of decimals in JSON output formats.
Disabled by default.
### output_format_json_escape_forward_slashes {#output_format_json_escape_forward_slashes}
Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.
@ -3875,6 +3902,12 @@ Result:
{"number":"2"}
```
### output_format_json_validate_utf8 {#output_format_json_validate_utf8}
Controls validation of UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate UTF-8.
Disabled by default.
## TSV format settings {#tsv-format-settings}
### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}

View File

@ -131,7 +131,7 @@ Example of configuration for versions later or equal to 22.8:
<type>cache</type>
<disk>s3</disk>
<path>/s3_cache/</path>
<max_size>10000000</max_size>
<max_size>10Gi</max_size>
</cache>
</disks>
<policies>
@ -155,7 +155,7 @@ Example of configuration for versions earlier than 22.8:
<endpoint>...</endpoint>
... s3 configuration ...
<data_cache_enabled>1</data_cache_enabled>
<data_cache_max_size>10000000</data_cache_max_size>
<data_cache_max_size>10737418240</data_cache_max_size>
</s3>
</disks>
<policies>
@ -172,7 +172,7 @@ Cache **configuration settings**:
- `path` - path to the directory with cache. Default: None, this setting is obligatory.
- `max_size` - maximum size of the cache in bytes. When the limit is reached, cache files are evicted according to the cache eviction policy. Default: None, this setting is obligatory.
- `max_size` - maximum size of the cache in bytes or in readable format, e.g. `ki, Mi, Gi, etc`, example `10Gi` (such format works starting from `22.10` version). When the limit is reached, cache files are evicted according to the cache eviction policy. Default: None, this setting is obligatory.
- `cache_on_write_operations` - allow to turn on `write-through` cache (caching data on any write operations: `INSERT` queries, background merges). Default: `false`. The `write-through` cache can be disabled per query using setting `enable_filesystem_cache_on_write_operations` (data is cached only if both cache config settings and corresponding query setting are enabled).
@ -182,7 +182,7 @@ Cache **configuration settings**:
- `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading.
- `max_file_segment_size` - a maximum size of a single cache file. Default: `104857600` (100 Mb).
- `max_file_segment_size` - a maximum size of a single cache file in bytes or in readable format (`ki, Mi, Gi, etc`, example `10Gi`). Default: `104857600` (`100Mi`).
- `max_elements` - a limit for a number of cache files. Default: `1048576`.
@ -213,9 +213,31 @@ Cache **commands**:
- `SYSTEM DROP FILESYSTEM CACHE (<path>) (ON CLUSTER)`
- `SHOW CACHES` -- show list of caches which were configured on the server.
- `SHOW FILESYSTEM CACHES` -- show list of filesystem caches which were configured on the server. (For versions <= `22.8` the command is named `SHOW CACHES`)
- `DESCRIBE CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command.
```sql
SHOW FILESYSTEM CACHES
```
Result:
``` text
┌─Caches────┐
│ s3_cache │
└───────────┘
```
- `DESCRIBE CACHE '<cache_name>'` - show cache configuration and some general statistics for a specific cache. Cache name can be taken from `SHOW CACHES` command. (For versions <= `22.8` the command is named `DESCRIBE CACHE`)
```sql
DESCRIBE CACHE 's3_cache'
```
``` text
┌────max_size─┬─max_elements─┬─max_file_segment_size─┬─cache_on_write_operations─┬─enable_cache_hits_threshold─┬─current_size─┬─current_elements─┬─path────────┬─do_not_evict_index_and_mark_files─┐
│ 10000000000 │ 1048576 │ 104857600 │ 1 │ 0 │ 3276 │ 54 │ /s3_cache/ │ 1 │
└─────────────┴──────────────┴───────────────────────┴───────────────────────────┴─────────────────────────────┴──────────────┴──────────────────┴─────────────┴───────────────────────────────────┘
```
Cache current metrics:

View File

@ -16,7 +16,7 @@ ClickHouse also supports:
## NULL Processing
During aggregation, all `NULL`s are skipped.
During aggregation, all `NULL`s are skipped. If the aggregation has several parameters it will ignore any row in which one or more of the parameters are NULL.
**Examples:**
@ -58,4 +58,17 @@ SELECT groupArray(y) FROM t_null_big
`groupArray` does not include `NULL` in the resulting array.
You can use [COALESCE](../../sql-reference/functions/functions-for-nulls.md#coalesce) to change NULL into a value that makes sense in your use case. For example: `avg(COALESCE(column, 0))` with use the column value in the aggregation or zero if NULL:
``` sql
SELECT
avg(y),
avg(coalesce(y, 0))
FROM t_null_big
```
``` text
┌─────────────avg(y)─┬─avg(coalesce(y, 0))─┐
│ 2.3333333333333335 │ 1.4 │
└────────────────────┴─────────────────────┘
```

View File

@ -5,7 +5,7 @@ sidebar_position: 103
# anyHeavy
Selects a frequently occurring value using the [heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) algorithm. If there is a value that occurs more than in half the cases in each of the querys execution threads, this value is returned. Normally, the result is nondeterministic.
Selects a frequently occurring value using the [heavy hitters](https://doi.org/10.1145/762471.762473) algorithm. If there is a value that occurs more than in half the cases in each of the querys execution threads, this value is returned. Normally, the result is nondeterministic.
``` sql
anyHeavy(column)

View File

@ -7,7 +7,7 @@ sidebar_position: 108
Returns an array of the approximately most frequent values in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves).
Implements the [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) algorithm for analyzing TopK, based on the reduce-and-combine algorithm from [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf).
Implements the [Filtered Space-Saving](https://doi.org/10.1016/j.ins.2010.08.024) algorithm for analyzing TopK, based on the reduce-and-combine algorithm from [Parallel Space Saving](https://doi.org/10.1016/j.ins.2015.09.003).
``` sql
topK(N)(column)

View File

@ -6,7 +6,7 @@ sidebar_label: Tuple(T1, T2, ...)
# Tuple(t1, T2, …)
A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types).
A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element.
Tuples are used for temporary column grouping. Columns can be grouped when an IN expression is used in a query, and for specifying certain formal parameters of lambda functions. For more information, see the sections [IN operators](../../sql-reference/operators/in.md) and [Higher order functions](../../sql-reference/functions/index.md#higher-order-functions).
@ -32,6 +32,34 @@ SELECT tuple(1,'a') AS x, toTypeName(x)
└─────────┴───────────────────────────┘
```
Tuple can contain a single element
Example:
``` sql
SELECT tuple('a') AS x;
```
``` text
┌─x─────┐
│ ('a') │
└───────┘
```
There is a syntax sugar using parentheses `( tuple_element1, tuple_element2 )` to create a tuple of several elements without tuple function.
Example:
``` sql
SELECT (1, 'a') AS x, (today(), rand(), 'someString') y, ('a') not_a_tuple;
```
``` text
┌─x───────┬─y──────────────────────────────────────┬─not_a_tuple─┐
│ (1,'a') │ ('2022-09-21',2006973416,'someString') │ a │
└─────────┴────────────────────────────────────────┴─────────────┘
```
## Working with Data Types
When creating a tuple on the fly, ClickHouse automatically detects the type of each argument as the minimum of the types which can store the argument value. If the argument is [NULL](../../sql-reference/syntax.md#null-literal), the type of the tuple element is [Nullable](../../sql-reference/data-types/nullable.md).
@ -76,4 +104,84 @@ Result:
└────────────────────┘
```
## Comparison operations with Tuple
The operation of comparing two tuples is performed sequentially element by element from left to right. If the element of the first tuple is greater than the corresponding element of the second tuple, then the first tuple is greater than the second, if the elements are equal, the next element is compared.
Example:
```sql
SELECT (1, 'z') > (1, 'a') c1, (2022, 01, 02) > (2023, 04, 02) c2, (1,2,3) = (3,2,1) c3;
```
``` text
┌─c1─┬─c2─┬─c3─┐
│ 1 │ 0 │ 0 │
└────┴────┴────┘
```
Real world examples:
```sql
CREATE TABLE test
(
`year` Int16,
`month` Int8,
`day` Int8
)
ENGINE = Memory AS
SELECT *
FROM values((2022, 12, 31), (2000, 1, 1));
SELECT * FROM test;
┌─year─┬─month─┬─day─┐
│ 2022 │ 12 │ 31 │
│ 2000 │ 1 │ 1 │
└──────┴───────┴─────┘
SELECT *
FROM test
WHERE (year, month, day) > (2010, 1, 1);
┌─year─┬─month─┬─day─┐
│ 2022 │ 12 │ 31 │
└──────┴───────┴─────┘
CREATE TABLE test
(
`key` Int64,
`duration` UInt32,
`value` Float64
)
ENGINE = Memory AS
SELECT *
FROM values((1, 42, 66.5), (1, 42, 70), (2, 1, 10), (2, 2, 0));
SELECT * FROM test;
┌─key─┬─duration─┬─value─┐
│ 1 │ 42 │ 66.5 │
│ 1 │ 42 │ 70 │
│ 2 │ 1 │ 10 │
│ 2 │ 2 │ 0 │
└─────┴──────────┴───────┘
-- Let's find a value for each key with the biggest duration, if durations are equal, select the biggest value
SELECT
key,
max(duration),
argMax(value, (duration, value))
FROM test
GROUP BY key
ORDER BY key ASC;
┌─key─┬─max(duration)─┬─argMax(value, tuple(duration, value))─┐
│ 1 │ 42 │ 70 │
│ 2 │ 2 │ 0 │
└─────┴───────────────┴───────────────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/data_types/tuple/) <!--hide-->

View File

@ -268,15 +268,17 @@ Result:
```
:::note
The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `0` by default.
The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `0` by default.
Behavior for
* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOf*`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. In case argument is out of normal range:
* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. In case argument is out of normal range:
* If the argument is smaller than 1970, the result will be calculated from the argument `1970-01-01 (00:00:00)` instead.
* If the return type is `DateTime` and the argument is larger than `2106-02-07 08:28:15`, the result will be calculated from the argument `2106-02-07 08:28:15` instead.
* If the return type is `Date` and the argument is larger than `2149-06-06`, the result will be calculated from the argument `2149-06-06` instead.
* If `toLastDayOfMonth` is called with an argument greater then `2149-05-31`, the result will be calculated from the argument `2149-05-31` instead.
* `enable_extended_results_for_datetime_functions = 1`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`.
* `enable_extended_results_for_datetime_functions = 1`:
* Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`.
* Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime` if their argument is a `Date` or `DateTime`, and they return `DateTime64` if their argument is a `Date32` or `DateTime64`.
:::
## toStartOfYear

View File

@ -296,7 +296,14 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0
## javaHash
Calculates [JavaHash](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452) from a string. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types.
**Syntax**
@ -312,6 +319,20 @@ A `Int32` data type hash value.
Query:
```sql
SELECT javaHash(toInt32(123));
```
Result:
```response
┌─javaHash(toInt32(123))─┐
│ 123 │
└────────────────────────┘
```
Query:
```sql
SELECT javaHash('Hello, world!');
```

View File

@ -1818,11 +1818,6 @@ Result:
└──────────────────────────────────────────────────┘
```
## modelEvaluate(model_name, …)
Evaluate external model.
Accepts a model name and model arguments. Returns Float64.
## catboostEvaluate(path_to_model, feature_1, feature_2, …, feature_n)
Evaluate external catboost model. [CatBoost](https://catboost.ai) is an open-source gradient boosting library developed by Yandex for machine learing.

View File

@ -565,6 +565,10 @@ Result:
└────────────────────────────┘
```
## tryBase58Decode(s)
Similar to base58Decode, but returns an empty string in case of error.
## base64Encode(s)
Encodes s string into base64
@ -579,7 +583,7 @@ Alias: `FROM_BASE64`.
## tryBase64Decode(s)
Similar to base64Decode, but in case of error an empty string would be returned.
Similar to base64Decode, but returns an empty string in case of error.
## endsWith(s, suffix)

View File

@ -6,7 +6,7 @@ sidebar_label: VIEW
# CREATE VIEW
Creates a new view. Views can be [normal](#normal), [materialized](#materialized), [live](#live-view), and [window](#window-view) (live view and window view are experimental features).
Creates a new view. Views can be [normal](#normal-view), [materialized](#materialized-view), [live](#live-view-experimental), and [window](#window-view-experimental) (live view and window view are experimental features).
## Normal View

View File

@ -362,7 +362,7 @@ SHOW ACCESS
Returns a list of clusters. All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table.
:::note
:::note
`SHOW CLUSTER name` query displays the contents of system.clusters table for this cluster.
:::
@ -493,6 +493,20 @@ Result:
└──────────────────┴────────┴─────────────┘
```
## SHOW FILESYSTEM CACHES
```sql
SHOW FILESYSTEM CACHES
```
Result:
``` text
┌─Caches────┐
│ s3_cache │
└───────────┘
```
**See Also**
- [system.settings](../../operations/system-tables/settings.md) table

View File

@ -159,7 +159,7 @@ Provides possibility to stop background merges for tables in the MergeTree famil
SYSTEM STOP MERGES [ON VOLUME <volume_name> | [db.]merge_tree_family_table_name]
```
:::note
:::note
`DETACH / ATTACH` table will start background merges for the table even in case when merges have been stopped for all MergeTree tables before.
:::
@ -303,7 +303,7 @@ One may execute query after:
Replica attaches locally found parts and sends info about them to Zookeeper.
Parts present on a replica before metadata loss are not re-fetched from other ones if not being outdated (so replica restoration does not mean re-downloading all data over the network).
:::warning
:::warning
Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached.
:::
@ -345,3 +345,11 @@ SYSTEM RESTORE REPLICA test ON CLUSTER cluster;
### RESTART REPLICAS
Provides possibility to reinitialize Zookeeper sessions state for all `ReplicatedMergeTree` tables, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed
### DROP FILESYSTEM CACHE
Allows to drop filesystem cache.
```sql
SYSTEM DROP FILESYSTEM CACHE
```

View File

@ -6,11 +6,50 @@ slug: /ru/development/integrating_rust_libraries
Интеграция библиотек будет описываться на основе работы проведенной для библиотеки BLAKE3.
Первым шагом интеграции является создание форка библиотеки для внесения дальнейших изменений по совместимости методов на Rust с C/C++.
Первым шагом интеграции является добавление библиотеки в папку /rust. Для этого необходимо создать в папке пустой Rust-проект, подключив в Cargo.toml нужную библиотеку. Также необходимо компилировать новую библиотеку как статическую, для этого необходимо добавить `crate-type = ["staticlib"]` в Cargo.toml.
В форке необходимо будет изменить конфигурацию Cargo.toml, сменив таргет на статическую библиотеку. Кроме того, необходимо добавить crate cbindgen для его дальнейшего использования при сборке.
Далее необходимо подключить библиотеку к CMake. Для этого в ClickHouse была подключена библиотека Corrosion. Первым шагом является подключение папки с новой библиотекой в корневом CMakeLists.txt папки /rust. После этого следует добавить в директорию с библиотекой файл CMakeLists.txt, в котором будет вызвана функция из Corrosion. Как пример, приведем файл из BLAKE3:
Необходимо создать либо отредактировать сборочный скрипт build.rs, добавив в него запуск cbindgen - автогенератора заголовочных файлов .h. Пример такого запуска можно увидеть в build.rs для BLAKE3:
```
corrosion_import_crate(MANIFEST_PATH Cargo.toml NO_STD)
target_include_directories(_ch_rust_blake3 INTERFACE include)
add_library(ch_rust::blake3 ALIAS _ch_rust_blake3)
```
Таким образом, мы создадим при помощи Corrosion корректный CMake-таргет, а затем переобозначим его более понятным именем. Стоит отметить, что имя `_ch_rust_blake3` происходит из Cargo.toml, где оно выступает в качестве имени проекта (`name = "_ch_rust_blake3"`).
Поскольку типы данных Rust не совместимы с типами данных C/C++, то в проекте мы опишем интерфейс для методов-прослоек, которые будут преобразовывать данные, получаемые из C/C++, вызывать методы библиотеки, а затем делать преобразование возвращаемых обратно данных. В частности, рассмотрим такой метод, написанный для BLAKE3:
```
#[no_mangle]
pub unsafe extern "C" fn blake3_apply_shim(
begin: *const c_char,
_size: u32,
out_char_data: *mut u8,
) -> *mut c_char {
if begin.is_null() {
let err_str = CString::new("input was a null pointer").unwrap();
return err_str.into_raw();
}
let mut hasher = blake3::Hasher::new();
let input_bytes = CStr::from_ptr(begin);
let input_res = input_bytes.to_bytes();
hasher.update(input_res);
let mut reader = hasher.finalize_xof();
reader.fill(std::slice::from_raw_parts_mut(out_char_data, blake3::OUT_LEN));
std::ptr::null_mut()
}
```
На вход метод принимает строку в C-совместимом формате, её размер и указатель, в который будет положен результат. Кроме того, для того, чтобы иметь возможность вывести ошибку, метод возвращает строку с ней как результат работы (и нулевой указатель в случае отсутствия ошибок). C-совместимые не используются в методах BLAKE3, поэтому они конвертируются посредством соотвествующих структур и методов в привычные форматы для языка Rust. Далее запускаются оригинальные методы библиотеки. Их результат следует преобразовать обратно в C-совместимые структуры, однако в данном случае удается избежать обратной конвертации, поскольку библиотека поддерживает запись напрямую по указателю *mut u8.
Кроме того, стоит отметить обязательность аттрибута #[no_mangle] и указания extern "C" для всех таких методов. Без них не удастся провести корректную совместимую с C/C++ компиляцию. Кроме того, они необходимы для следующего этапа подключения библиотеки.
После написания кода методов-прослоек нам необходимо подготовить заголовочный файл для библиотеки. Это можно сделать вручную, либо воспользоваться библиотекой cbindgen для автогенерации. В случае с использованием cbindgen, нам понадобится написать сборочный скрипт build.rs и подключить cbindgen в качестве build-dependency.
Пример сборочного скрипта, которым можно автосгенерировать заголовочный файл:
```
let crate_dir = env::var("CARGO_MANIFEST_DIR").unwrap();
@ -28,39 +67,7 @@ slug: /ru/development/integrating_rust_libraries
}
```
Скрипт назначает директорию для создания залоговочного файла и в конце запускает метод генерации cbindgen.
Если возникают пробемы с генерацией заголовков, может потребоваться поработать с конфигурацией cbindgen через файл cbindgen.toml, взяв оригинальный темплейт разработчика cbindgen: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml).
Далее необходимо подключить библиотеку к CMake. В BLAKE3 для этого были созданы два файла - CMakeLists.txt и файл, содержащий функцию для запуска cargo build как таргета, - build_rust_lib.cmake. Последний стоит скопировать в подключаемую библиотеку и отредактировать в соотвествии с требуемыми параметрами для сборки - добавить флаги или какие-либо настройки для разных архитектур.
Завершив настройку CMake, можно приступить к созданию методов-прослоек, которые обеспечат совместимость библиотеки и остального кода ClickHouse. В частности, рассмотрим такой метод, написанный для BLAKE3:
```
#[no_mangle]
pub unsafe extern "C" fn blake3_apply_shim(
begin: *const c_char,
_size: u32,
out_char_data: *mut u8,
) -> *mut c_char {
if begin.is_null() {
let err_str = CString::new("input was a null pointer").unwrap();
return err_str.into_raw();
}
let mut hasher = Hasher::new();
let input_bytes = CStr::from_ptr(begin);
let input_res = input_bytes.to_bytes();
hasher.update(input_res);
let mut reader = hasher.finalize_xof();
reader.fill(std::slice::from_raw_parts_mut(out_char_data, OUT_LEN));
std::ptr::null_mut()
}
```
На вход метод принимает строку в C-совместимом формате, её размер и указатель, в который будет положен результат. Кроме того, для того, чтобы иметь возможность вывести ошибку, метод возвращает строку с ней как результат работы (и нулевой указатель в случае отсутствия ошибок). C-совместимые не используются в методах BLAKE3, поэтому они конвертируются посредством соотвествующих структур и методов в привычные форматы для языка Rust. Далее запускаются оригинальные методы библиотеки. Их результат следует преобразовать обратно в C-совместимые структуры, однако в данном случае удается избежать обратной конвертации, поскольку библиотека поддерживает запись напрямую по указателю *mut u8.
Кроме того, стоит отметить обязательность аттрибута #[no_mangle] и указания extern "C" для всех таких методов. Без них не удастся провести корректную совместимую с C/C++ компиляцию и автогенерацию заголовков.
После этих действий можно протестировать компиляцию и работу методов на небольшом проекте для выявляения несовместимостей и ошибок. Если возникают пробемы с генерацией заголовков, может потребоваться поработать с конфигурацией cbindgen через файл cbindgen.toml, найти который можно либо в BLAKE3, либо взяв оригинальный темплейт разработчика cbindgen: [https://github.com/eqrion/cbindgen/blob/master/template.toml](https://github.com/eqrion/cbindgen/blob/master/template.toml).
В заключение, стоит отметить пару пробелм, возникших при интеграции BLAKE3:
1) Некоторые архитектуры могут потребовать настройки компиляции в build.rs и в build_rust_lib.cmake в связи со своими особенностями.
2) MemorySanitizer плохо понимает инициализацию памяти в Rust, поэтому для избежания ложноположительных срабатываний для BLAKE3 был создан альтернативный метод, который более явно, но при этом медленнее, инициализировал память. Он компилируется только для сборки с MemorySanitizer и в релиз не попадает. Вероятно, возможны и более красивые способы решения этой проблемы, но при интеграции BLAKE3 они не были обнаружены.
В заключение, стоит отметить проблему, с которой пришлось столкнуться при интеграции BLAKE3:
C++ MemorySanitizer плохо понимает инициализацию памяти в Rust, поэтому для избежания ложноположительных срабатываний для BLAKE3 был создан альтернативный метод, который более явно, но при этом медленнее, инициализировал память. Он компилируется только для сборки с MemorySanitizer и в релиз не попадает. Вероятно, возможны и более красивые способы решения этой проблемы, но при интеграции BLAKE3 они не были обнаружены.

View File

@ -22,17 +22,17 @@ ClickHouse позволяет отправить на сервер данные,
Таких секций может быть несколько - по числу передаваемых таблиц.
**external** - маркер начала секции.
**file** - путь к файлу с дампом таблицы, или -, что обозначает stdin.
Из stdin может быть считана только одна таблица.
- **--external** - маркер начала секции.
- **--file** - путь к файлу с дампом таблицы, или `-`, что обозначает `stdin`.
Из `stdin` может быть считана только одна таблица.
Следующие параметры не обязательные:
**name** - имя таблицы. Если не указано - используется _data.
**format** - формат данных в файле. Если не указано - используется TabSeparated.
- **--name** - имя таблицы. Если не указано - используется _data.
- **--format** - формат данных в файле. Если не указано - используется TabSeparated.
Должен быть указан один из следующих параметров:
**types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, …
**structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов.
- **--types** - список типов столбцов через запятую. Например, `UInt64,String`. Столбцы будут названы _1, _2, …
- **--structure** - структура таблицы, в форме `UserID UInt64`, `URL String`. Определяет имена и типы столбцов.
Файлы, указанные в file, будут разобраны форматом, указанным в format, с использованием типов данных, указанных в types или structure. Таблица будет загружена на сервер, и доступна там в качестве временной таблицы с именем name.

View File

@ -1342,12 +1342,13 @@ TCP порт для защищённого обмена данными с кли
Если политика не задана, используется [tmp_path](#tmp-path). В противном случае `tmp_path` игнорируется.
:::note "Примечание"
- `move_factor` игнорируется.
- `keep_free_space_bytes` игнорируется.
- `max_data_part_size_bytes` игнорируется.
- В данной политике у вас должен быть ровно один том.
:::
:::note "Примечание"
- `move_factor` игнорируется.
- `keep_free_space_bytes` игнорируется.
- `max_data_part_size_bytes` игнорируется.
- В данной политике должен быть ровно один том, содержащий только локальный диски.
:::
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}
Размер кеша (в байтах) для несжатых данных, используемых движками таблиц семейства [MergeTree](../../operations/server-configuration-parameters/settings.md).

View File

@ -3799,17 +3799,6 @@ Exception: Total regexp lengths too large.
Значение по умолчанию: `1`.
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
Включает или отключает возвращение результатов типа `Date32` с расширенным диапазоном (по сравнению с типом `Date`) для функций [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) и [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth).
Возможные значения:
- 0 — Функции возвращают результаты типа `Date` для всех типов аргументов.
- 1 — Функции возвращают результаты типа `Date32` для аргументов типа `Date32` или `DateTime64` и возвращают `Date` в других случаях.
Значение по умолчанию: `0`.
**Пример**
Запрос:
@ -3832,6 +3821,19 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
└─────┴─────┴───────┘
```
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
Включает или отключает возвращение результатов типа:
- `Date32` с расширенным диапазоном (по сравнению с типом `Date`) для функций [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) и [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth).
- `DateTime64` с расширенным диапазоном (по сравнению с типом `DateTime`) для функций [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) и [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot).
Возможные значения:
- 0 — Функции возвращают результаты типа `Date` или `DateTime` для всех типов аргументов.
- 1 — Функции возвращают результаты типа `Date32` или `DateTime64` для аргументов типа `Date32` или `DateTime64` и возвращают `Date` или `DateTime` в других случаях.
Значение по умолчанию: `0`.
## optimize_move_to_prewhere {#optimize_move_to_prewhere}
Включает или отключает автоматическую оптимизацию [PREWHERE](../../sql-reference/statements/select/prewhere.md) в запросах [SELECT](../../sql-reference/statements/select/index.md).

View File

@ -5,7 +5,7 @@ sidebar_position: 103
# anyHeavy {#anyheavyx}
Выбирает часто встречающееся значение с помощью алгоритма «[heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf)». Если существует значение, которое встречается чаще, чем в половине случаев, в каждом потоке выполнения запроса, то возвращается данное значение. В общем случае, результат недетерминирован.
Выбирает часто встречающееся значение с помощью алгоритма «[heavy hitters](https://doi.org/10.1145/762471.762473)». Если существует значение, которое встречается чаще, чем в половине случаев, в каждом потоке выполнения запроса, то возвращается данное значение. В общем случае, результат недетерминирован.
``` sql
anyHeavy(column)

View File

@ -7,7 +7,7 @@ sidebar_position: 108
Возвращает массив наиболее часто встречающихся значений в указанном столбце. Результирующий массив упорядочен по убыванию частоты значения (не по самим значениям).
Реализует [Filtered Space-Saving](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf) алгоритм для анализа TopK, на основе reduce-and-combine алгоритма из методики [Parallel Space Saving](https://arxiv.org/pdf/1401.0702.pdf).
Реализует [Filtered Space-Saving](https://doi.org/10.1016/j.ins.2010.08.024) алгоритм для анализа TopK, на основе reduce-and-combine алгоритма из методики [Parallel Space Saving](https://doi.org/10.1016/j.ins.2015.09.003).
``` sql
topK(N)(column)

View File

@ -268,16 +268,19 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
```
:::note
Тип возвращаемого значения описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday` определяется конфигурационным параметром [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) имеющим по умолчанию значение `0`.
Тип возвращаемого значения описанными далее функциями `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` определяется конфигурационным параметром [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) имеющим по умолчанию значение `0`.
Поведение для
* `enable_extended_results_for_datetime_functions = 0`: Функции `toStartOf*`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`. Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат.
* `enable_extended_results_for_datetime_functions = 0`: Функции `toStartOf*`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`. Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime`. Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат.
В случае если значение аргумента вне нормального диапазона:
* `1970-01-01 (00:00:00)` будет возвращён для моментов времени до 1970 года,
* `2106-02-07 08:28:15` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `DateTime`,
* `2149-06-06` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `Date`,
* `2149-05-31` будет результатом функции `toLastDayOfMonth` при обработке аргумента больше `2149-05-31`.
* `enable_extended_results_for_datetime_functions = 1`: Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` если их аргумент `Date32` или `DateTime64`.
* `enable_extended_results_for_datetime_functions = 1`:
* Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` если их аргумент `Date32` или `DateTime64`.
* Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `DateTime64` если их аргумент `Date32` или `DateTime64`.
:::
## toStartOfYear {#tostartofyear}

View File

@ -1722,12 +1722,6 @@ SELECT joinGet(db_test.id_val,'val',toUInt32(number)) from numbers(4) SETTINGS j
└──────────────────────────────────────────────────┘
```
## modelEvaluate(model_name, …) {#function-modelevaluate}
Оценивает внешнюю модель.
Принимает на вход имя и аргументы модели. Возвращает Float64.
## throwIf(x\[, message\[, error_code\]\]) {#throwifx-custom-message}
Бросает исключение, если аргумент не равен нулю.

View File

@ -32,7 +32,7 @@ sidebar_label: FROM
Запросы, которые используют `FINAL` выполняются немного медленее, чем аналогичные запросы без него, потому что:
- Данные мёржатся во время выполнения запроса.
- Данные мёржатся во время выполнения запроса в памяти, и это не приводит к физическому мёржу кусков на дисках.
- Запросы с модификатором `FINAL` читают столбцы первичного ключа в дополнение к столбцам, используемым в запросе.
**В большинстве случаев избегайте использования `FINAL`.** Общий подход заключается в использовании агрегирующих запросов, которые предполагают, что фоновые процессы движков семейства `MergeTree` ещё не случились (например, сами отбрасывают дубликаты). {## TODO: examples ##}

View File

@ -5,7 +5,7 @@ sidebar_position: 103
# anyHeavy {#anyheavyx}
选择一个频繁出现的值,使用[heavy hitters](http://www.cs.umd.edu/~samir/498/karp.pdf) 算法。 如果某个值在查询的每个执行线程中出现的情况超过一半,则返回此值。 通常情况下,结果是不确定的。
选择一个频繁出现的值,使用[heavy hitters](https://doi.org/10.1145/762471.762473) 算法。 如果某个值在查询的每个执行线程中出现的情况超过一半,则返回此值。 通常情况下,结果是不确定的。
``` sql
anyHeavy(column)

View File

@ -7,7 +7,7 @@ sidebar_position: 108
返回指定列中近似最常见值的数组。 生成的数组按值的近似频率降序排序(而不是值本身)。
实现了[过滤节省空间](http://www.l2f.inesc-id.pt/~fmmb/wiki/uploads/Work/misnis.ref0a.pdf)算法, 使用基于reduce-and-combine的算法借鉴[并行节省空间](https://arxiv.org/pdf/1401.0702.pdf)。
实现了[过滤节省空间](https://doi.org/10.1016/j.ins.2010.08.024)算法, 使用基于reduce-and-combine的算法借鉴[并行节省空间](https://doi.org/10.1016/j.ins.2015.09.003)。
**语法**

View File

@ -625,11 +625,6 @@ ORDER BY k ASC
使用指定的连接键从Join类型引擎的表中获取数据。
## modelEvaluate(model_name, …) {#function-modelevaluate}
使用外部模型计算。
接受模型的名称以及模型的参数。返回Float64类型的值。
## throwIf(x) {#throwifx}
如果参数不为零则抛出异常。

View File

@ -7,12 +7,7 @@ include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
# The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.),
# each of them may be built and linked as a separate library.
# If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only.
if (USE_MUSL)
# Only clickhouse-keeper can be built with musl currently
option (ENABLE_CLICKHOUSE_ALL "Enable all ClickHouse modes by default" OFF)
else ()
option (ENABLE_CLICKHOUSE_ALL "Enable all ClickHouse modes by default" ON)
endif ()
option (ENABLE_CLICKHOUSE_ALL "Enable all ClickHouse modes by default" ON)
option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_ALL})
option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)"

View File

@ -842,6 +842,7 @@ void Client::addOptions(OptionsDescription & options_description)
("no-warnings", "disable warnings when client connects to server")
("fake-drop", "Ignore all DROP queries, should be used only for testing")
("accept-invalid-certificate", "Ignore certificate verification errors, equal to config parameters openSSL.client.invalidCertificateHandler.name=AcceptCertificateHandler and openSSL.client.verificationMode=none")
;
/// Commandline options related to external tables.
@ -976,6 +977,13 @@ void Client::processOptions(const OptionsDescription & options_description,
config().setBool("no-warnings", true);
if (options.count("fake-drop"))
fake_drop = true;
if (options.count("accept-invalid-certificate"))
{
config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler");
config().setString("openSSL.client.verificationMode", "none");
}
else
config().setString("openSSL.client.invalidCertificateHandler.name", "RejectCertificateHandler");
if ((query_fuzzer_runs = options["query-fuzzer-runs"].as<int>()))
{

View File

@ -33,7 +33,7 @@ install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-ke
add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
if (BUILD_STANDALONE_KEEPER)
# Sraight list of all required sources
# Straight list of all required sources
set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp
@ -92,6 +92,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/BaseDaemon.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/SentryWriter.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Daemon/GraphiteWriter.cpp
${CMAKE_CURRENT_BINARY_DIR}/../../src/Daemon/GitHash.generated.cpp
Keeper.cpp
TinyContext.cpp

View File

@ -490,8 +490,9 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
void Keeper::logRevision() const
{
Poco::Logger::root().information("Starting ClickHouse Keeper " + std::string{VERSION_STRING}
+ " with revision " + std::to_string(ClickHouseRevision::getVersionRevision())
+ ", " + build_id_info
+ "(revision : " + std::to_string(ClickHouseRevision::getVersionRevision())
+ ", git hash: " + (git_hash.empty() ? "<unknown>" : git_hash)
+ ", build id: " + (build_id.empty() ? "<unknown>" : build_id) + ")"
+ ", PID " + std::to_string(getpid()));
}

View File

@ -203,7 +203,7 @@ void LocalServer::tryInitPath()
global_context->setPath(path);
global_context->setTemporaryStorage(path + "tmp");
global_context->setTemporaryStorage(path + "tmp", "", 0);
global_context->setFlagsPath(path + "flags");
global_context->setUserFilesPath(""); // user's files are everywhere

View File

@ -345,7 +345,7 @@ struct Checker
;
#ifndef DISABLE_HARMFUL_ENV_VAR_CHECK
#if !defined(DISABLE_HARMFUL_ENV_VAR_CHECK) && !defined(USE_MUSL)
/// NOTE: We will migrate to full static linking or our own dynamic loader to make this code obsolete.
void checkHarmfulEnvironmentVariables(char ** argv)
{
@ -408,6 +408,7 @@ void checkHarmfulEnvironmentVariables(char ** argv)
/// 3rd-party uncontrolled dangerous libraries into the process address space,
/// because it is insane.
#if !defined(USE_MUSL)
extern "C"
{
void * dlopen(const char *, int)
@ -430,6 +431,7 @@ extern "C"
return "ClickHouse does not allow dynamic library loading";
}
}
#endif
/// This allows to implement assert to forbid initialization of a class in static constructors.
@ -455,7 +457,7 @@ int main(int argc_, char ** argv_)
/// Note: we forbid dlopen in our code.
updatePHDRCache();
#ifndef DISABLE_HARMFUL_ENV_VAR_CHECK
#if !defined(DISABLE_HARMFUL_ENV_VAR_CHECK) && !defined(USE_MUSL)
checkHarmfulEnvironmentVariables(argv_);
#endif

View File

@ -209,7 +209,7 @@ try
fs::remove(it->path());
}
else
LOG_DEBUG(log, "Skipped file in temporary path {}", it->path().string());
LOG_DEBUG(log, "Found unknown file in temporary path {}", it->path().string());
}
}
catch (...)
@ -971,7 +971,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
{
std::string tmp_path = config().getString("tmp_path", path / "tmp/");
std::string tmp_policy = config().getString("tmp_policy", "");
const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy);
size_t tmp_max_size = config().getUInt64("tmp_max_size", 0);
const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy, tmp_max_size);
for (const DiskPtr & disk : volume->getDisks())
setupTmpPath(log, disk->getPath());
}
@ -1118,7 +1119,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
size_t max_server_memory_usage = config->getUInt64("max_server_memory_usage", 0);
double max_server_memory_usage_to_ram_ratio = config->getDouble("max_server_memory_usage_to_ram_ratio", 0.9);
size_t default_max_server_memory_usage = memory_amount * max_server_memory_usage_to_ram_ratio;
size_t default_max_server_memory_usage = static_cast<size_t>(memory_amount * max_server_memory_usage_to_ram_ratio);
if (max_server_memory_usage == 0)
{
@ -1391,7 +1392,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
/// Lower cache size on low-memory systems.
double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
size_t max_cache_size = memory_amount * cache_size_to_ram_max_ratio;
size_t max_cache_size = static_cast<size_t>(memory_amount * cache_size_to_ram_max_ratio);
/// Size of cache for uncompressed blocks. Zero means disabled.
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", "");
@ -1474,23 +1475,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
/// try set up encryption. There are some errors in config, error will be printed and server wouldn't start.
CompressionCodecEncrypted::Configuration::instance().load(config(), "encryption_codecs");
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
{
/// Disable DNS caching at all
DNSResolver::instance().setDisableCacheFlag();
LOG_DEBUG(log, "DNS caching disabled");
}
else
{
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
}
if (dns_cache_updater)
dns_cache_updater->start();
SCOPE_EXIT({
/// Stop reloading of the main config. This must be done before `global_context->shutdown()` because
/// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart.
@ -1547,6 +1531,27 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_DEBUG(log, "Destroyed global context.");
});
/// DNSCacheUpdater uses BackgroundSchedulePool which lives in shared context
/// and thus this object must be created after the SCOPE_EXIT object where shared
/// context is destroyed.
/// In addition this object has to be created before the loading of the tables.
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
{
/// Disable DNS caching at all
DNSResolver::instance().setDisableCacheFlag();
LOG_DEBUG(log, "DNS caching disabled");
}
else
{
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
}
if (dns_cache_updater)
dns_cache_updater->start();
/// Set current database name before loading tables and databases because
/// system logs may copy global context.
global_context->setCurrentDatabaseNameInGlobalContext(default_database);
@ -1614,7 +1619,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
double total_memory_tracker_sample_probability = config().getDouble("total_memory_tracker_sample_probability", 0);
if (total_memory_tracker_sample_probability)
if (total_memory_tracker_sample_probability > 0.0)
{
total_memory_tracker.setSampleProbability(total_memory_tracker_sample_probability);
}

View File

@ -0,0 +1,4 @@
<clickhouse>
<path_to_regions_hierarchy_file>config.d/regions_hierarchy.txt</path_to_regions_hierarchy_file>
<path_to_regions_names_files>config.d/</path_to_regions_names_files>
</clickhouse>

View File

@ -0,0 +1 @@
../../../tests/config/regions_hierarchy.txt

View File

@ -0,0 +1 @@
../../../tests/config/regions_names_en.txt

View File

@ -462,8 +462,9 @@
<tmp_path>/var/lib/clickhouse/tmp/</tmp_path>
<!-- Disable AuthType plaintext_password and no_password for ACL. -->
<!-- <allow_plaintext_password>0</allow_plaintext_password> -->
<!-- <allow_no_password>0</allow_no_password> -->`
<allow_plaintext_password>1</allow_plaintext_password>
<allow_no_password>1</allow_no_password>
<allow_implicit_no_password>1</allow_implicit_no_password>
<!-- Policy from the <storage_configuration> for the temporary files.
If not set <tmp_path> is used, otherwise <tmp_path> is ignored.
@ -1113,10 +1114,6 @@
<asynchronous_metric_log>
<database>system</database>
<table>asynchronous_metric_log</table>
<!--
Asynchronous metrics are updated once a minute, so there is
no need to flush more often.
-->
<flush_interval_milliseconds>7000</flush_interval_milliseconds>
</asynchronous_metric_log>

View File

@ -820,7 +820,7 @@ async function draw(idx, chart, url_params, query) {
sync.sub(plots[idx]);
/// Set title
const title = queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] );
const title = queries[idx].title ? queries[idx].title.replaceAll(/\{(\w+)\}/g, (_, name) => params[name] ) : '';
chart.querySelector('.title').firstChild.data = title;
}

View File

@ -58,7 +58,9 @@ void processFile(const fs::path & file_path, const fs::path & dst_path, bool tes
}
else
{
auto src_buf = createReadBufferFromFileBase(file_path, {}, fs::file_size(file_path));
ReadSettings read_settings{};
read_settings.local_fs_method = LocalFSReadMethod::pread;
auto src_buf = createReadBufferFromFileBase(file_path, read_settings, fs::file_size(file_path));
std::shared_ptr<WriteBuffer> dst_buf;
/// test mode for integration tests.
@ -146,7 +148,7 @@ try
po::options_description description("Allowed options", getTerminalWidth());
description.add_options()
("help,h", "produce help message")
("metadata-path", po::value<std::string>(), "Metadata path (select data_paths from system.tables where name='table_name'")
("metadata-path", po::value<std::string>(), "Metadata path (SELECT data_paths FROM system.tables WHERE name = 'table_name' AND database = 'database_name')")
("test-mode", "Use test mode, which will put data on given url via PUT")
("link", "Create symlinks instead of copying")
("url", po::value<std::string>(), "Web server url for test mode")

View File

@ -162,6 +162,7 @@ void AccessControl::setUpFromMainConfig(const Poco::Util::AbstractConfiguration
if (config_.has("custom_settings_prefixes"))
setCustomSettingsPrefixes(config_.getString("custom_settings_prefixes"));
setImplicitNoPasswordAllowed(config_.getBool("allow_implicit_no_password", true));
setNoPasswordAllowed(config_.getBool("allow_no_password", true));
setPlaintextPasswordAllowed(config_.getBool("allow_plaintext_password", true));
@ -499,6 +500,15 @@ void AccessControl::checkSettingNameIsAllowed(const std::string_view setting_nam
custom_settings_prefixes->checkSettingNameIsAllowed(setting_name);
}
void AccessControl::setImplicitNoPasswordAllowed(bool allow_implicit_no_password_)
{
allow_implicit_no_password = allow_implicit_no_password_;
}
bool AccessControl::isImplicitNoPasswordAllowed() const
{
return allow_implicit_no_password;
}
void AccessControl::setNoPasswordAllowed(bool allow_no_password_)
{

View File

@ -134,6 +134,11 @@ public:
bool isSettingNameAllowed(const std::string_view name) const;
void checkSettingNameIsAllowed(const std::string_view name) const;
/// Allows implicit user creation without password (by default it's allowed).
/// In other words, allow 'CREATE USER' queries without 'IDENTIFIED WITH' clause.
void setImplicitNoPasswordAllowed(const bool allow_implicit_no_password_);
bool isImplicitNoPasswordAllowed() const;
/// Allows users without password (by default it's allowed).
void setNoPasswordAllowed(const bool allow_no_password_);
bool isNoPasswordAllowed() const;
@ -222,6 +227,7 @@ private:
std::unique_ptr<AccessChangesNotifier> changes_notifier;
std::atomic_bool allow_plaintext_password = true;
std::atomic_bool allow_no_password = true;
std::atomic_bool allow_implicit_no_password = true;
std::atomic_bool users_without_row_policies_can_read_rows = false;
std::atomic_bool on_cluster_queries_require_cluster_grant = false;
std::atomic_bool select_from_system_db_requires_grant = false;

View File

@ -25,7 +25,7 @@ enum class AccessType
M(SHOW_DICTIONARIES, "", DICTIONARY, SHOW) /* allows to execute SHOW DICTIONARIES, SHOW CREATE DICTIONARY, EXISTS <dictionary>;
implicitly enabled by any grant on the dictionary */\
M(SHOW, "", GROUP, ALL) /* allows to execute SHOW, USE, EXISTS, CHECK, DESCRIBE */\
M(SHOW_CACHES, "", GROUP, ALL) \
M(SHOW_FILESYSTEM_CACHES, "", GROUP, ALL) \
\
M(SELECT, "", COLUMN, ALL) \
M(INSERT, "", COLUMN, ALL) \

View File

@ -164,8 +164,10 @@ public:
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_dst_ptr = b.CreatePointerCast(b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_dst_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_src_ptr = b.CreatePointerCast(b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_src_ptr, denominator_offset), denominator_type->getPointerTo());
auto * ty_aggregate_data_dst_ptr = llvm::cast<llvm::PointerType>(aggregate_data_dst_ptr->getType()->getScalarType())->getElementType();
auto * denominator_dst_ptr = b.CreatePointerCast(b.CreateConstInBoundsGEP1_64(ty_aggregate_data_dst_ptr, aggregate_data_dst_ptr, denominator_offset), denominator_type->getPointerTo());
auto * ty_aggregate_data_src_ptr = llvm::cast<llvm::PointerType>(aggregate_data_src_ptr->getType()->getScalarType())->getElementType();
auto * denominator_src_ptr = b.CreatePointerCast(b.CreateConstInBoundsGEP1_64(ty_aggregate_data_src_ptr, aggregate_data_src_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_dst_value = b.CreateLoad(denominator_type, denominator_dst_ptr);
auto * denominator_src_value = b.CreateLoad(denominator_type, denominator_src_ptr);
@ -184,7 +186,8 @@ public:
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo());
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(ty_aggregate_data_ptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_value = b.CreateLoad(denominator_type, denominator_ptr);
auto * double_numerator = nativeCast<Numerator>(b, numerator_value, b.getDoubleTy());
@ -311,7 +314,8 @@ public:
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(nullptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo());
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * denominator_ptr = b.CreatePointerCast(b.CreateConstGEP1_32(ty_aggregate_data_ptr, aggregate_data_ptr, denominator_offset), denominator_type->getPointerTo());
auto * denominator_value_updated = b.CreateAdd(b.CreateLoad(denominator_type, denominator_ptr), llvm::ConstantInt::get(denominator_type, 1));
b.CreateStore(denominator_value_updated, denominator_ptr);
}

View File

@ -74,7 +74,8 @@ public:
auto * denominator_type = toNativeType<Denominator>(b);
static constexpr size_t denominator_offset = offsetof(Fraction, denominator);
auto * denominator_offset_ptr = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, denominator_offset);
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * denominator_offset_ptr = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, denominator_offset);
auto * denominator_ptr = b.CreatePointerCast(denominator_offset_ptr, denominator_type->getPointerTo());
auto * weight_cast_to_denominator = nativeCast(b, arguments_types[1], argument_values[1], denominator_type);

View File

@ -75,6 +75,12 @@ namespace
/// We need to look inside the type of its argument to obtain it.
const DataTypeAggregateFunction & datatype_aggfunc = dynamic_cast<const DataTypeAggregateFunction &>(*argument_type_ptr);
AggregateFunctionPtr aggfunc = datatype_aggfunc.getFunction();
if (aggfunc->getName() != AggregateFunctionGroupBitmapData<UInt8>::name())
throw Exception(
"Illegal type " + argument_types[0]->getName() + " of argument for aggregate function " + name,
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
DataTypePtr nested_argument_type_ptr = aggfunc->getArgumentTypes()[0];
AggregateFunctionPtr res(createWithIntegerType<AggregateFunctionTemplate, AggregateFunctionGroupBitmapData>(

View File

@ -207,7 +207,8 @@ public:
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, { removeNullable(nullable_type) }, { wrapped_value });
b.CreateBr(join_block);
@ -218,10 +219,11 @@ public:
};
template <bool result_is_nullable, bool serialize_flag, bool null_is_skipped>
class AggregateFunctionIfNullVariadic final
: public AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>
template <bool result_is_nullable, bool serialize_flag>
class AggregateFunctionIfNullVariadic final : public AggregateFunctionNullBase<
result_is_nullable,
serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag>>
{
public:
@ -259,7 +261,7 @@ public:
if (is_nullable[i])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
if (null_is_skipped && nullable_col.isNullAt(row_num))
if (nullable_col.isNullAt(row_num))
{
/// If at least one column has a null value in the current row,
/// we don't process this row.
@ -293,7 +295,7 @@ public:
for (size_t i = row_begin; i < row_end; i++)
{
final_null_flags[i] = (null_is_skipped && filter_null_map[i]) || !filter_values[i];
final_null_flags[i] = filter_null_map[i] || !filter_values[i];
}
}
else
@ -310,7 +312,7 @@ public:
if (is_nullable[arg])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[arg]);
if (null_is_skipped && (arg != filter_column_num))
if (arg != filter_column_num)
{
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
const UInt8 * col_null_map = nullmap_column.getData().data();
@ -368,9 +370,7 @@ public:
if (is_nullable[i])
{
auto * wrapped_value = b.CreateExtractValue(argument_value, {0});
if constexpr (null_is_skipped)
is_null_values[i] = b.CreateExtractValue(argument_value, {1});
is_null_values[i] = b.CreateExtractValue(argument_value, {1});
wrapped_values[i] = wrapped_value;
non_nullable_types[i] = removeNullable(arguments_types[i]);
@ -387,23 +387,20 @@ public:
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * join_block_after_null_checks = llvm::BasicBlock::Create(head->getContext(), "join_block_after_null_checks", head->getParent());
if constexpr (null_is_skipped)
auto * values_have_null_ptr = b.CreateAlloca(b.getInt1Ty());
b.CreateStore(b.getInt1(false), values_have_null_ptr);
for (auto * is_null_value : is_null_values)
{
auto * values_have_null_ptr = b.CreateAlloca(b.getInt1Ty());
b.CreateStore(b.getInt1(false), values_have_null_ptr);
if (!is_null_value)
continue;
for (auto * is_null_value : is_null_values)
{
if (!is_null_value)
continue;
auto * values_have_null = b.CreateLoad(b.getInt1Ty(), values_have_null_ptr);
b.CreateStore(b.CreateOr(values_have_null, is_null_value), values_have_null_ptr);
}
b.CreateCondBr(b.CreateLoad(b.getInt1Ty(), values_have_null_ptr), join_block, join_block_after_null_checks);
auto * values_have_null = b.CreateLoad(b.getInt1Ty(), values_have_null_ptr);
b.CreateStore(b.CreateOr(values_have_null, is_null_value), values_have_null_ptr);
}
b.CreateCondBr(b.CreateLoad(b.getInt1Ty(), values_have_null_ptr), join_block, join_block_after_null_checks);
b.SetInsertPoint(join_block_after_null_checks);
const auto & predicate_type = arguments_types[argument_values.size() - 1];
@ -423,7 +420,8 @@ public:
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, non_nullable_types, wrapped_values);
b.CreateBr(join_block);
@ -433,8 +431,10 @@ public:
#endif
private:
using Base = AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>;
using Base = AggregateFunctionNullBase<
result_is_nullable,
serialize_flag,
AggregateFunctionIfNullVariadic<result_is_nullable, serialize_flag>>;
static constexpr size_t MAX_ARGS = 8;
size_t number_of_arguments = 0;
@ -473,14 +473,14 @@ AggregateFunctionPtr AggregateFunctionIf::getOwnNullAdapter(
{
if (return_type_is_nullable)
{
return std::make_shared<AggregateFunctionIfNullVariadic<true, true, true>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionIfNullVariadic<true, true>>(nested_function, arguments, params);
}
else
{
if (need_to_serialize_flag)
return std::make_shared<AggregateFunctionIfNullVariadic<false, true, true>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionIfNullVariadic<false, true>>(nested_function, arguments, params);
else
return std::make_shared<AggregateFunctionIfNullVariadic<false, false, true>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionIfNullVariadic<false, false>>(nested_function, arguments, params);
}
}
}

View File

@ -196,7 +196,7 @@ public:
const Array & params,
const AggregateFunctionProperties & /*properties*/) const override
{
return std::make_shared<AggregateFunctionNullVariadic<false, false, false>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionNullVariadic<false, false>>(nested_function, arguments, params);
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override

View File

@ -201,7 +201,8 @@ public:
static constexpr size_t value_offset_from_structure = offsetof(SingleValueDataFixed<T>, value);
auto * type = toNativeType<T>(builder);
auto * value_ptr_with_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, value_offset_from_structure);
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * value_ptr_with_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, value_offset_from_structure);
auto * value_ptr = b.CreatePointerCast(value_ptr_with_offset, type->getPointerTo());
return value_ptr;

View File

@ -108,14 +108,14 @@ public:
{
if (return_type_is_nullable)
{
return std::make_shared<AggregateFunctionNullVariadic<true, true, true>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionNullVariadic<true, true>>(nested_function, arguments, params);
}
else
{
if (serialize_flag)
return std::make_shared<AggregateFunctionNullVariadic<false, true, true>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
else
return std::make_shared<AggregateFunctionNullVariadic<false, true, false>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionNullVariadic<false, true>>(nested_function, arguments, params);
}
}
}

View File

@ -225,7 +225,8 @@ public:
if constexpr (result_is_nullable)
b.CreateMemSet(aggregate_data_ptr, llvm::ConstantInt::get(b.getInt8Ty(), 0), this->prefix_size, llvm::assumeAligned(this->alignOfData()));
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileCreate(b, aggregate_data_ptr_with_prefix_size_offset);
}
@ -235,16 +236,18 @@ public:
if constexpr (result_is_nullable)
{
auto * aggregate_data_is_null_dst_value = b.CreateLoad(aggregate_data_dst_ptr);
auto * aggregate_data_is_null_src_value = b.CreateLoad(aggregate_data_src_ptr);
auto * aggregate_data_is_null_dst_value = b.CreateLoad(aggregate_data_dst_ptr->getType()->getPointerElementType(), aggregate_data_dst_ptr);
auto * aggregate_data_is_null_src_value = b.CreateLoad(aggregate_data_src_ptr->getType()->getPointerElementType(), aggregate_data_src_ptr);
auto * is_src_null = nativeBoolCast(b, std::make_shared<DataTypeUInt8>(), aggregate_data_is_null_src_value);
auto * is_null_result_value = b.CreateSelect(is_src_null, llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_is_null_dst_value);
b.CreateStore(is_null_result_value, aggregate_data_dst_ptr);
}
auto * aggregate_data_dst_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_dst_ptr, this->prefix_size);
auto * aggregate_data_src_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_src_ptr, this->prefix_size);
auto * ty_aggregate_data_dst_ptr = llvm::cast<llvm::PointerType>(aggregate_data_dst_ptr->getType()->getScalarType())->getElementType();
auto * aggregate_data_dst_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_dst_ptr, aggregate_data_dst_ptr, this->prefix_size);
auto * ty_aggregate_data_src_ptr = llvm::cast<llvm::PointerType>(aggregate_data_src_ptr->getType()->getScalarType())->getElementType();
auto * aggregate_data_src_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_src_ptr, aggregate_data_src_ptr, this->prefix_size);
this->nested_function->compileMerge(b, aggregate_data_dst_ptr_with_prefix_size_offset, aggregate_data_src_ptr_with_prefix_size_offset);
}
@ -278,7 +281,8 @@ public:
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size);
auto * nested_result = this->nested_function->compileGetResult(builder, aggregate_data_ptr_with_prefix_size_offset);
b.CreateStore(b.CreateInsertValue(nullable_value, nested_result, {0}), nullable_value_ptr);
b.CreateBr(join_block);
@ -374,7 +378,8 @@ public:
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, { removeNullable(nullable_type) }, { wrapped_value });
b.CreateBr(join_block);
@ -386,16 +391,17 @@ public:
};
template <bool result_is_nullable, bool serialize_flag, bool null_is_skipped>
class AggregateFunctionNullVariadic final
: public AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>
template <bool result_is_nullable, bool serialize_flag>
class AggregateFunctionNullVariadic final : public AggregateFunctionNullBase<
result_is_nullable,
serialize_flag,
AggregateFunctionNullVariadic<result_is_nullable, serialize_flag>>
{
public:
AggregateFunctionNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: AggregateFunctionNullBase<result_is_nullable, serialize_flag,
AggregateFunctionNullVariadic<result_is_nullable, serialize_flag, null_is_skipped>>(std::move(nested_function_), arguments, params),
number_of_arguments(arguments.size())
: AggregateFunctionNullBase<result_is_nullable, serialize_flag, AggregateFunctionNullVariadic<result_is_nullable, serialize_flag>>(
std::move(nested_function_), arguments, params)
, number_of_arguments(arguments.size())
{
if (number_of_arguments == 1)
throw Exception("Logical error: single argument is passed to AggregateFunctionNullVariadic", ErrorCodes::LOGICAL_ERROR);
@ -418,7 +424,7 @@ public:
if (is_nullable[i])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
if (null_is_skipped && nullable_col.isNullAt(row_num))
if (nullable_col.isNullAt(row_num))
{
/// If at least one column has a null value in the current row,
/// we don't process this row.
@ -476,11 +482,8 @@ public:
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
nested_columns[i] = &nullable_col.getNestedColumn();
if constexpr (null_is_skipped)
{
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
nullable_filters.push_back(nullmap_column.getData().data());
}
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
nullable_filters.push_back(nullmap_column.getData().data());
}
else
{
@ -488,14 +491,7 @@ public:
}
}
/// We can have 0 nullable filters if we don't skip nulls
if (nullable_filters.size() == 0)
{
this->setFlag(place);
this->nested_function->addBatchSinglePlace(row_begin, row_end, this->nestedPlace(place), nested_columns, arena, -1);
return;
}
chassert(nullable_filters.size() > 0);
bool found_one = false;
if (nullable_filters.size() == 1)
{
@ -567,9 +563,7 @@ public:
if (is_nullable[i])
{
auto * wrapped_value = b.CreateExtractValue(argument_value, {0});
if constexpr (null_is_skipped)
is_null_values[i] = b.CreateExtractValue(argument_value, {1});
is_null_values[i] = b.CreateExtractValue(argument_value, {1});
wrapped_values[i] = wrapped_value;
non_nullable_types[i] = removeNullable(arguments_types[i]);
@ -581,48 +575,40 @@ public:
}
}
if constexpr (null_is_skipped)
auto * head = b.GetInsertBlock();
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
auto * values_have_null_ptr = b.CreateAlloca(b.getInt1Ty());
b.CreateStore(b.getInt1(false), values_have_null_ptr);
for (auto * is_null_value : is_null_values)
{
auto * head = b.GetInsertBlock();
if (!is_null_value)
continue;
auto * join_block = llvm::BasicBlock::Create(head->getContext(), "join_block", head->getParent());
auto * if_null = llvm::BasicBlock::Create(head->getContext(), "if_null", head->getParent());
auto * if_not_null = llvm::BasicBlock::Create(head->getContext(), "if_not_null", head->getParent());
auto * values_have_null_ptr = b.CreateAlloca(b.getInt1Ty());
b.CreateStore(b.getInt1(false), values_have_null_ptr);
for (auto * is_null_value : is_null_values)
{
if (!is_null_value)
continue;
auto * values_have_null = b.CreateLoad(b.getInt1Ty(), values_have_null_ptr);
b.CreateStore(b.CreateOr(values_have_null, is_null_value), values_have_null_ptr);
}
b.CreateCondBr(b.CreateLoad(b.getInt1Ty(), values_have_null_ptr), if_null, if_not_null);
b.SetInsertPoint(if_null);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, arguments_types, wrapped_values);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
auto * values_have_null = b.CreateLoad(b.getInt1Ty(), values_have_null_ptr);
b.CreateStore(b.CreateOr(values_have_null, is_null_value), values_have_null_ptr);
}
else
{
b.CreateCondBr(b.CreateLoad(b.getInt1Ty(), values_have_null_ptr), if_null, if_not_null);
b.SetInsertPoint(if_null);
b.CreateBr(join_block);
b.SetInsertPoint(if_not_null);
if constexpr (result_is_nullable)
b.CreateStore(llvm::ConstantInt::get(b.getInt8Ty(), 1), aggregate_data_ptr);
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(nullptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, non_nullable_types, wrapped_values);
}
auto * ty_aggregate_data_ptr = llvm::cast<llvm::PointerType>(aggregate_data_ptr->getType()->getScalarType())->getElementType();
auto * aggregate_data_ptr_with_prefix_size_offset = b.CreateConstInBoundsGEP1_64(ty_aggregate_data_ptr, aggregate_data_ptr, this->prefix_size);
this->nested_function->compileAdd(b, aggregate_data_ptr_with_prefix_size_offset, arguments_types, wrapped_values);
b.CreateBr(join_block);
b.SetInsertPoint(join_block);
}
#endif

View File

@ -199,17 +199,6 @@ public:
return this->getName() == rhs.getName() && this->haveEqualArgumentTypes(rhs);
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params,
const AggregateFunctionProperties &) const override
{
/// Even though some values are mapped to aggregating key, it could return nulls for the below case.
/// aggregated events: [A -> B -> C]
/// events to find: [C -> D]
/// [C -> D] is not matched to 'A -> B -> C' so that it returns null.
return std::make_shared<AggregateFunctionNullVariadic<false, false, true>>(nested_function, arguments, params);
}
void insert(Data & a, const Node * v, Arena * arena) const
{
++a.total_values;

View File

@ -161,7 +161,7 @@ private:
Y max_y = data.max_y;
Float64 diff_y = max_y - min_y;
if (diff_y)
if (diff_y != 0.0)
{
for (size_t i = 0; i <= diff_x; ++i)
{
@ -194,7 +194,7 @@ private:
auto upper_bound = [&](size_t bucket_num)
{
bound.second = (bucket_num + 1) * multiple_d;
bound.first = std::floor(bound.second);
bound.first = static_cast<size_t>(std::floor(bound.second));
};
upper_bound(cur_bucket_num);
for (size_t i = 0; i <= (diff_x + 1); ++i)
@ -249,7 +249,7 @@ private:
value += getBar(point_y ? 1 : 0);
};
if (diff_y)
if (diff_y != 0.0)
std::for_each(new_points.begin(), new_points.end(), get_bars);
else
std::for_each(new_points.begin(), new_points.end(), get_bars_for_constant);

View File

@ -225,7 +225,7 @@ public:
ResultType var_value = data.getPopulation();
if (var_value > 0)
dst.push_back(data.getMoment3() / pow(var_value, 1.5));
dst.push_back(static_cast<ResultType>(data.getMoment3() / pow(var_value, 1.5)));
else
dst.push_back(std::numeric_limits<ResultType>::quiet_NaN());
}
@ -234,7 +234,7 @@ public:
ResultType var_value = data.getSample();
if (var_value > 0)
dst.push_back(data.getMoment3() / pow(var_value, 1.5));
dst.push_back(static_cast<ResultType>(data.getMoment3() / pow(var_value, 1.5)));
else
dst.push_back(std::numeric_limits<ResultType>::quiet_NaN());
}
@ -243,7 +243,7 @@ public:
ResultType var_value = data.getPopulation();
if (var_value > 0)
dst.push_back(data.getMoment4() / pow(var_value, 2));
dst.push_back(static_cast<ResultType>(data.getMoment4() / pow(var_value, 2)));
else
dst.push_back(std::numeric_limits<ResultType>::quiet_NaN());
}
@ -252,7 +252,7 @@ public:
ResultType var_value = data.getSample();
if (var_value > 0)
dst.push_back(data.getMoment4() / pow(var_value, 2));
dst.push_back(static_cast<ResultType>(data.getMoment4() / pow(var_value, 2)));
else
dst.push_back(std::numeric_limits<ResultType>::quiet_NaN());
}

View File

@ -216,7 +216,7 @@ public:
for (size_t i = 0; i < keys_vec_size; ++i)
{
auto value = value_column[values_vec_offset + i];
auto key = key_column[keys_vec_offset + i].get<T>();
T key = static_cast<T>(key_column[keys_vec_offset + i].get<T>());
if (!keepKey(key))
continue;

View File

@ -252,13 +252,6 @@ public:
bool allocatesMemoryInArena() const override { return false; }
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array & params,
const AggregateFunctionProperties & /*properties*/) const override
{
return std::make_shared<AggregateFunctionNullVariadic<false, false, false>>(nested_function, arguments, params);
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, const size_t row_num, Arena *) const override
{
bool has_event = false;

Some files were not shown because too many files have changed in this diff Show More