Merge remote-tracking branch 'upstream/master' into fix-projection-merge

This commit is contained in:
jsc0218 2024-08-26 13:06:58 +00:00
commit b381c9dd84
184 changed files with 1661 additions and 676 deletions

View File

@ -130,6 +130,7 @@ jobs:
with: with:
build_name: package_debug build_name: package_debug
data: ${{ needs.RunConfig.outputs.data }} data: ${{ needs.RunConfig.outputs.data }}
force: true
BuilderBinDarwin: BuilderBinDarwin:
needs: [RunConfig, BuildDockers] needs: [RunConfig, BuildDockers]
if: ${{ !failure() && !cancelled() }} if: ${{ !failure() && !cancelled() }}

View File

@ -34,7 +34,7 @@ curl https://clickhouse.com/ | sh
Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know. Every month we get together with the community (users, contributors, customers, those interested in learning more about ClickHouse) to discuss what is coming in the latest release. If you are interested in sharing what you've built on ClickHouse, let us know.
* [v24.8 Community Call](https://clickhouse.com/company/events/v24-8-community-release-call) - August 20 * [v24.9 Community Call](https://clickhouse.com/company/events/v24-9-community-release-call) - September 26
## Upcoming Events ## Upcoming Events
@ -44,13 +44,22 @@ The following upcoming meetups are featuring creator of ClickHouse & CTO, Alexey
* [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25 * [ClickHouse Guangzhou User Group Meetup](https://mp.weixin.qq.com/s/GSvo-7xUoVzCsuUvlLTpCw) - August 25
* [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5 * [San Francisco Meetup (Cloudflare)](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/302540575) - September 5
* [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/clickhouse-nc-meetup-group/events/302557230) - September 9 * [Raleigh Meetup (Deutsche Bank)](https://www.meetup.com/triangletechtalks/events/302723486/) - September 9
* [New York Meetup (Ramp)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10 * [New York Meetup (Rokt)](https://www.meetup.com/clickhouse-new-york-user-group/events/302575342) - September 10
* [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12 * [Chicago Meetup (Jump Capital)](https://lu.ma/43tvmrfw) - September 12
Other upcoming meetups
* [Seattle Meetup (Statsig)](https://www.meetup.com/clickhouse-seattle-user-group/events/302518075/) - August 27
* [Melbourne Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302732666/) - August 27
* [Sydney Meetup](https://www.meetup.com/clickhouse-australia-user-group/events/302862966/) - September 5
* [Zurich Meetup](https://www.meetup.com/clickhouse-switzerland-meetup-group/events/302267429/) - September 5
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
## Recent Recordings ## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"
* **Recording available**: [**v24.4 Release Call**](https://www.youtube.com/watch?v=dtUqgcfOGmE) All the features of 24.4, one convenient video! Watch it now! * **Recording available**: [**v24.8 LTS Release Call**](https://www.youtube.com/watch?v=AeLmp2jc51k) All the features of 24.8 LTS, one convenient video! Watch it now!
## Interested in joining ClickHouse and making it your full-time job? ## Interested in joining ClickHouse and making it your full-time job?

View File

@ -311,6 +311,14 @@ int SecureSocketImpl::sendBytes(const void* buffer, int length, int flags)
while (mustRetry(rc, remaining_time)); while (mustRetry(rc, remaining_time));
if (rc <= 0) if (rc <= 0)
{ {
// At this stage we still can have last not yet received SSL message containing SSL error
// so make a read to force SSL to process possible SSL error
if (SSL_get_error(_pSSL, rc) == SSL_ERROR_SYSCALL && SocketImpl::lastError() == POCO_ECONNRESET)
{
char c = 0;
SSL_read(_pSSL, &c, 1);
}
rc = handleError(rc); rc = handleError(rc);
if (rc == 0) throw SSLConnectionUnexpectedlyClosedException(); if (rc == 0) throw SSLConnectionUnexpectedlyClosedException();
} }

View File

@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "x86_64-pc-freebsd11")
set (CMAKE_ASM_COMPILER_TARGET "x86_64-pc-freebsd11") set (CMAKE_ASM_COMPILER_TARGET "x86_64-pc-freebsd11")
set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-x86_64") set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../../contrib/sysroot/freebsd-x86_64")
# dprintf is used in a patched version of replxx
add_compile_definitions(_WITH_DPRINTF)
set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 5d04501f93a4fb7f0bb8b73b8f614bc986f9e25b Subproject commit 711c18e7f4d951255aa8b0851e5a55d5a5fb0ddb

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.8.1.2684" ARG VERSION="24.8.2.3"
ARG PACKAGES="clickhouse-keeper" ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc # lts / testing / prestable / etc
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.8.1.2684" ARG VERSION="24.8.2.3"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS="" ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable" ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.8.1.2684" ARG VERSION="24.8.2.3"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
#docker-official-library:off #docker-official-library:off

View File

@ -40,6 +40,3 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git" ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git"
RUN git clone --recursive ${sqllogic_test_repo} RUN git clone --recursive ${sqllogic_test_repo}
COPY run.sh /
CMD ["/bin/bash", "/run.sh"]

View File

@ -0,0 +1,71 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.5.5.41-stable (441d4a6ebe3) FIXME as compared to v24.5.4.49-stable (63b760955a0)
#### Improvement
* Backported in [#66768](https://github.com/ClickHouse/ClickHouse/issues/66768): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#65350](https://github.com/ClickHouse/ClickHouse/issues/65350): Fix possible abort on uncaught exception in ~WriteBufferFromFileDescriptor in StatusFile. [#64206](https://github.com/ClickHouse/ClickHouse/pull/64206) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#65621](https://github.com/ClickHouse/ClickHouse/issues/65621): Fix `Cannot find column` in distributed query with `ARRAY JOIN` by `Nested` column. Fixes [#64755](https://github.com/ClickHouse/ClickHouse/issues/64755). [#64801](https://github.com/ClickHouse/ClickHouse/pull/64801) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66884](https://github.com/ClickHouse/ClickHouse/issues/66884): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#65933](https://github.com/ClickHouse/ClickHouse/issues/65933): For queries that read from `PostgreSQL`, cancel the internal `PostgreSQL` query if the ClickHouse query is finished. Otherwise, `ClickHouse` query cannot be canceled until the internal `PostgreSQL` query is finished. [#65771](https://github.com/ClickHouse/ClickHouse/pull/65771) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#66301](https://github.com/ClickHouse/ClickHouse/issues/66301): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Backported in [#66328](https://github.com/ClickHouse/ClickHouse/issues/66328): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
* Backported in [#66155](https://github.com/ClickHouse/ClickHouse/issues/66155): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#66454](https://github.com/ClickHouse/ClickHouse/issues/66454): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66226](https://github.com/ClickHouse/ClickHouse/issues/66226): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66680](https://github.com/ClickHouse/ClickHouse/issues/66680): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#66604](https://github.com/ClickHouse/ClickHouse/issues/66604): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
* Backported in [#66360](https://github.com/ClickHouse/ClickHouse/issues/66360): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#66972](https://github.com/ClickHouse/ClickHouse/issues/66972): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66691](https://github.com/ClickHouse/ClickHouse/issues/66691): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#66969](https://github.com/ClickHouse/ClickHouse/issues/66969): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66720](https://github.com/ClickHouse/ClickHouse/issues/66720): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66951](https://github.com/ClickHouse/ClickHouse/issues/66951): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66757](https://github.com/ClickHouse/ClickHouse/issues/66757): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66948](https://github.com/ClickHouse/ClickHouse/issues/66948): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67633](https://github.com/ClickHouse/ClickHouse/issues/67633): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#67481](https://github.com/ClickHouse/ClickHouse/issues/67481): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
* Backported in [#67814](https://github.com/ClickHouse/ClickHouse/issues/67814): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67197](https://github.com/ClickHouse/ClickHouse/issues/67197): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#67379](https://github.com/ClickHouse/ClickHouse/issues/67379): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67501](https://github.com/ClickHouse/ClickHouse/issues/67501): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67576](https://github.com/ClickHouse/ClickHouse/issues/67576): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67850](https://github.com/ClickHouse/ClickHouse/issues/67850): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#66387](https://github.com/ClickHouse/ClickHouse/issues/66387): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
* Backported in [#66426](https://github.com/ClickHouse/ClickHouse/issues/66426): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66544](https://github.com/ClickHouse/ClickHouse/issues/66544): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66859](https://github.com/ClickHouse/ClickHouse/issues/66859): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
* Backported in [#66875](https://github.com/ClickHouse/ClickHouse/issues/66875): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
* Backported in [#67059](https://github.com/ClickHouse/ClickHouse/issues/67059): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
* Backported in [#66945](https://github.com/ClickHouse/ClickHouse/issues/66945): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67252](https://github.com/ClickHouse/ClickHouse/issues/67252): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
* Backported in [#67412](https://github.com/ClickHouse/ClickHouse/issues/67412): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).
* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).

View File

@ -0,0 +1,33 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.5.6.45-stable (bdca8604c29) FIXME as compared to v24.5.5.78-stable (0138248cb62)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#67902](https://github.com/ClickHouse/ClickHouse/issues/67902): Fixing the `Not-ready Set` error after the `PREWHERE` optimization for StorageMerge. [#65057](https://github.com/ClickHouse/ClickHouse/pull/65057) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#68252](https://github.com/ClickHouse/ClickHouse/issues/68252): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
* Backported in [#68064](https://github.com/ClickHouse/ClickHouse/issues/68064): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
* Backported in [#68158](https://github.com/ClickHouse/ClickHouse/issues/68158): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#68115](https://github.com/ClickHouse/ClickHouse/issues/68115): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67886](https://github.com/ClickHouse/ClickHouse/issues/67886): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#68272](https://github.com/ClickHouse/ClickHouse/issues/68272): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#67807](https://github.com/ClickHouse/ClickHouse/issues/67807): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67836](https://github.com/ClickHouse/ClickHouse/issues/67836): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#67991](https://github.com/ClickHouse/ClickHouse/issues/67991): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68207](https://github.com/ClickHouse/ClickHouse/issues/68207): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#68091](https://github.com/ClickHouse/ClickHouse/issues/68091): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#68122](https://github.com/ClickHouse/ClickHouse/issues/68122): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68171](https://github.com/ClickHouse/ClickHouse/issues/68171): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
* Backported in [#68337](https://github.com/ClickHouse/ClickHouse/issues/68337): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#68667](https://github.com/ClickHouse/ClickHouse/issues/68667): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Update version after release. [#67862](https://github.com/ClickHouse/ClickHouse/pull/67862) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Backported in [#68077](https://github.com/ClickHouse/ClickHouse/issues/68077): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
* Backported in [#68756](https://github.com/ClickHouse/ClickHouse/issues/68756): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).

View File

@ -0,0 +1,83 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.6.3.38-stable (4e33c831589) FIXME as compared to v24.6.2.17-stable (5710a8b5c0c)
#### Improvement
* Backported in [#66770](https://github.com/ClickHouse/ClickHouse/issues/66770): Make allow_experimental_analyzer be controlled by the initiator for distributed queries. This ensures compatibility and correctness during operations in mixed version clusters. [#65777](https://github.com/ClickHouse/ClickHouse/pull/65777) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#66885](https://github.com/ClickHouse/ClickHouse/issues/66885): Fix unexpeced size of low cardinality column in function calls. [#65298](https://github.com/ClickHouse/ClickHouse/pull/65298) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66303](https://github.com/ClickHouse/ClickHouse/issues/66303): Better handling of join conditions involving `IS NULL` checks (for example `ON (a = b AND (a IS NOT NULL) AND (b IS NOT NULL) ) OR ( (a IS NULL) AND (b IS NULL) )` is rewritten to `ON a <=> b`), fix incorrect optimization when condition other then `IS NULL` are present. [#65835](https://github.com/ClickHouse/ClickHouse/pull/65835) ([vdimir](https://github.com/vdimir)).
* Backported in [#66330](https://github.com/ClickHouse/ClickHouse/issues/66330): Add missing settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines/input_format_csv_try_infer_numbers_from_strings/input_format_csv_try_infer_strings_from_quoted_tuples` in schema inference cache because they can change the resulting schema. It prevents from incorrect result of schema inference with these settings changed. [#65980](https://github.com/ClickHouse/ClickHouse/pull/65980) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#66157](https://github.com/ClickHouse/ClickHouse/issues/66157): Fixed buffer overflow bug in `unbin`/`unhex` implementation. [#66106](https://github.com/ClickHouse/ClickHouse/pull/66106) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#66210](https://github.com/ClickHouse/ClickHouse/issues/66210): Disable the `merge-filters` optimization introduced in [#64760](https://github.com/ClickHouse/ClickHouse/issues/64760). It may cause an exception if optimization merges two filter expressions and does not apply a short-circuit evaluation. [#66126](https://github.com/ClickHouse/ClickHouse/pull/66126) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66456](https://github.com/ClickHouse/ClickHouse/issues/66456): Fixed a bug in ZooKeeper client: a session could get stuck in unusable state after receiving a hardware error from ZooKeeper. For example, this might happen due to "soft memory limit" in ClickHouse Keeper. [#66140](https://github.com/ClickHouse/ClickHouse/pull/66140) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66228](https://github.com/ClickHouse/ClickHouse/issues/66228): Fix issue in SumIfToCountIfVisitor and signed integers. [#66146](https://github.com/ClickHouse/ClickHouse/pull/66146) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66183](https://github.com/ClickHouse/ClickHouse/issues/66183): Fix rare case with missing data in the result of distributed query, close [#61432](https://github.com/ClickHouse/ClickHouse/issues/61432). [#66174](https://github.com/ClickHouse/ClickHouse/pull/66174) ([vdimir](https://github.com/vdimir)).
* Backported in [#66271](https://github.com/ClickHouse/ClickHouse/issues/66271): Don't throw `TIMEOUT_EXCEEDED` for `none_only_active` mode of `distributed_ddl_output_mode`. [#66218](https://github.com/ClickHouse/ClickHouse/pull/66218) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#66682](https://github.com/ClickHouse/ClickHouse/issues/66682): Fix handling limit for `system.numbers_mt` when no index can be used. [#66231](https://github.com/ClickHouse/ClickHouse/pull/66231) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#66587](https://github.com/ClickHouse/ClickHouse/issues/66587): Fixed how the ClickHouse server detects the maximum number of usable CPU cores as specified by cgroups v2 if the server runs in a container such as Docker. In more detail, containers often run their process in the root cgroup which has an empty name. In that case, ClickHouse ignored the CPU limits set by cgroups v2. [#66237](https://github.com/ClickHouse/ClickHouse/pull/66237) ([filimonov](https://github.com/filimonov)).
* Backported in [#66362](https://github.com/ClickHouse/ClickHouse/issues/66362): Fix the `Not-ready set` error when a subquery with `IN` is used in the constraint. [#66261](https://github.com/ClickHouse/ClickHouse/pull/66261) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#66613](https://github.com/ClickHouse/ClickHouse/issues/66613): Fix `Column identifier is already registered` error with `group_by_use_nulls=true` and new analyzer. [#66400](https://github.com/ClickHouse/ClickHouse/pull/66400) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66693](https://github.com/ClickHouse/ClickHouse/issues/66693): Fix the VALID UNTIL clause in the user definition resetting after a restart. Closes [#66405](https://github.com/ClickHouse/ClickHouse/issues/66405). [#66409](https://github.com/ClickHouse/ClickHouse/pull/66409) ([Nikolay Degterinsky](https://github.com/evillique)).
* Backported in [#66577](https://github.com/ClickHouse/ClickHouse/issues/66577): Fix `Cannot find column` error for queries with constant expression in `GROUP BY` key and new analyzer enabled. [#66433](https://github.com/ClickHouse/ClickHouse/pull/66433) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66721](https://github.com/ClickHouse/ClickHouse/issues/66721): Correctly track memory for `Allocator::realloc`. [#66548](https://github.com/ClickHouse/ClickHouse/pull/66548) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#66670](https://github.com/ClickHouse/ClickHouse/issues/66670): Fix reading of uninitialized memory when hashing empty tuples. This closes [#66559](https://github.com/ClickHouse/ClickHouse/issues/66559). [#66562](https://github.com/ClickHouse/ClickHouse/pull/66562) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#66952](https://github.com/ClickHouse/ClickHouse/issues/66952): Fix an invalid result for queries with `WINDOW`. This could happen when `PARTITION` columns have sparse serialization and window functions are executed in parallel. [#66579](https://github.com/ClickHouse/ClickHouse/pull/66579) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66956](https://github.com/ClickHouse/ClickHouse/issues/66956): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#66716](https://github.com/ClickHouse/ClickHouse/issues/66716): Fix removing named collections in local storage. [#66599](https://github.com/ClickHouse/ClickHouse/pull/66599) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#66759](https://github.com/ClickHouse/ClickHouse/issues/66759): Fix `Unknown identifier` and `Column is not under aggregate function` errors for queries with the expression `(column IS NULL).` The bug was triggered by [#65088](https://github.com/ClickHouse/ClickHouse/issues/65088), with the disabled analyzer only. [#66654](https://github.com/ClickHouse/ClickHouse/pull/66654) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66751](https://github.com/ClickHouse/ClickHouse/issues/66751): Fix `Method getResultType is not supported for QUERY query node` error when scalar subquery was used as the first argument of IN (with new analyzer). [#66655](https://github.com/ClickHouse/ClickHouse/pull/66655) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67635](https://github.com/ClickHouse/ClickHouse/issues/67635): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#67482](https://github.com/ClickHouse/ClickHouse/issues/67482): In rare cases ClickHouse could consider parts as broken because of some unexpected projections on disk. Now it's fixed. [#66898](https://github.com/ClickHouse/ClickHouse/pull/66898) ([alesapin](https://github.com/alesapin)).
* Backported in [#67816](https://github.com/ClickHouse/ClickHouse/issues/67816): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67199](https://github.com/ClickHouse/ClickHouse/issues/67199): TRUNCATE DATABASE used to stop replication as if it was a DROP DATABASE query, it's fixed. [#67129](https://github.com/ClickHouse/ClickHouse/pull/67129) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Backported in [#67381](https://github.com/ClickHouse/ClickHouse/issues/67381): Fix error `Cannot convert column because it is non constant in source stream but must be constant in result.` for a query that reads from the `Merge` table over the `Distriburted` table with one shard. [#67146](https://github.com/ClickHouse/ClickHouse/pull/67146) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#67244](https://github.com/ClickHouse/ClickHouse/issues/67244): This closes [#67156](https://github.com/ClickHouse/ClickHouse/issues/67156). This closes [#66447](https://github.com/ClickHouse/ClickHouse/issues/66447). The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/62907. [#67178](https://github.com/ClickHouse/ClickHouse/pull/67178) ([Maksim Kita](https://github.com/kitaisreal)).
* Backported in [#67503](https://github.com/ClickHouse/ClickHouse/issues/67503): Fix crash in DistributedAsyncInsert when connection is empty. [#67219](https://github.com/ClickHouse/ClickHouse/pull/67219) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67578](https://github.com/ClickHouse/ClickHouse/issues/67578): Fix execution of nested short-circuit functions. [#67520](https://github.com/ClickHouse/ClickHouse/pull/67520) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#67852](https://github.com/ClickHouse/ClickHouse/issues/67852): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#67808](https://github.com/ClickHouse/ClickHouse/issues/67808): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67838](https://github.com/ClickHouse/ClickHouse/issues/67838): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Backport [#66599](https://github.com/ClickHouse/ClickHouse/issues/66599) to 24.6: Fix dropping named collection in local storage"'. [#66922](https://github.com/ClickHouse/ClickHouse/pull/66922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#66332](https://github.com/ClickHouse/ClickHouse/issues/66332): Do not raise a NOT_IMPLEMENTED error when getting s3 metrics with a multiple disk configuration. [#65403](https://github.com/ClickHouse/ClickHouse/pull/65403) ([Elena Torró](https://github.com/elenatorro)).
* Backported in [#66142](https://github.com/ClickHouse/ClickHouse/issues/66142): Fix flaky test_storage_s3_queue tests. [#66009](https://github.com/ClickHouse/ClickHouse/pull/66009) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#66389](https://github.com/ClickHouse/ClickHouse/issues/66389): Disable broken cases from 02911_join_on_nullsafe_optimization. [#66310](https://github.com/ClickHouse/ClickHouse/pull/66310) ([vdimir](https://github.com/vdimir)).
* Backported in [#66428](https://github.com/ClickHouse/ClickHouse/issues/66428): Ignore subquery for IN in DDLLoadingDependencyVisitor. [#66395](https://github.com/ClickHouse/ClickHouse/pull/66395) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#66546](https://github.com/ClickHouse/ClickHouse/issues/66546): Add additional log masking in CI. [#66523](https://github.com/ClickHouse/ClickHouse/pull/66523) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#66861](https://github.com/ClickHouse/ClickHouse/issues/66861): Fix data race in S3::ClientCache. [#66644](https://github.com/ClickHouse/ClickHouse/pull/66644) ([Konstantin Morozov](https://github.com/k-morozov)).
* Backported in [#66877](https://github.com/ClickHouse/ClickHouse/issues/66877): Support one more case in JOIN ON ... IS NULL. [#66725](https://github.com/ClickHouse/ClickHouse/pull/66725) ([vdimir](https://github.com/vdimir)).
* Backported in [#67061](https://github.com/ClickHouse/ClickHouse/issues/67061): Increase asio pool size in case the server is tiny. [#66761](https://github.com/ClickHouse/ClickHouse/pull/66761) ([alesapin](https://github.com/alesapin)).
* Backported in [#66940](https://github.com/ClickHouse/ClickHouse/issues/66940): Small fix in realloc memory tracking. [#66820](https://github.com/ClickHouse/ClickHouse/pull/66820) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67254](https://github.com/ClickHouse/ClickHouse/issues/67254): Followup [#66725](https://github.com/ClickHouse/ClickHouse/issues/66725). [#66869](https://github.com/ClickHouse/ClickHouse/pull/66869) ([vdimir](https://github.com/vdimir)).
* Backported in [#67414](https://github.com/ClickHouse/ClickHouse/issues/67414): CI: Fix build results for release branches. [#67402](https://github.com/ClickHouse/ClickHouse/pull/67402) ([Max K.](https://github.com/maxknv)).
* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).

View File

@ -0,0 +1,33 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.6.4.42-stable (c534bb4b4dd) FIXME as compared to v24.6.3.95-stable (8325c920d11)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#68066](https://github.com/ClickHouse/ClickHouse/issues/68066): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
* Backported in [#68566](https://github.com/ClickHouse/ClickHouse/issues/68566): Fix indexHint function case found by fuzzer. [#66286](https://github.com/ClickHouse/ClickHouse/pull/66286) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68159](https://github.com/ClickHouse/ClickHouse/issues/68159): Fix cluster() for inter-server secret (preserve initial user as before). [#66364](https://github.com/ClickHouse/ClickHouse/pull/66364) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#68116](https://github.com/ClickHouse/ClickHouse/issues/68116): Fix possible PARAMETER_OUT_OF_BOUND error during reading variant subcolumn. [#66659](https://github.com/ClickHouse/ClickHouse/pull/66659) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67887](https://github.com/ClickHouse/ClickHouse/issues/67887): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#68611](https://github.com/ClickHouse/ClickHouse/issues/68611): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#68275](https://github.com/ClickHouse/ClickHouse/issues/68275): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#67993](https://github.com/ClickHouse/ClickHouse/issues/67993): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68208](https://github.com/ClickHouse/ClickHouse/issues/68208): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#68093](https://github.com/ClickHouse/ClickHouse/issues/68093): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#68124](https://github.com/ClickHouse/ClickHouse/issues/68124): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68221](https://github.com/ClickHouse/ClickHouse/issues/68221): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Backported in [#68173](https://github.com/ClickHouse/ClickHouse/issues/68173): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
* Backported in [#68339](https://github.com/ClickHouse/ClickHouse/issues/68339): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#68396](https://github.com/ClickHouse/ClickHouse/issues/68396): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#68668](https://github.com/ClickHouse/ClickHouse/issues/68668): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Update version after release. [#67909](https://github.com/ClickHouse/ClickHouse/pull/67909) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Backported in [#68079](https://github.com/ClickHouse/ClickHouse/issues/68079): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
* Backported in [#68758](https://github.com/ClickHouse/ClickHouse/issues/68758): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).

View File

@ -0,0 +1,55 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.7.3.47-stable (2e50fe27a14) FIXME as compared to v24.7.2.13-stable (6e41f601b2f)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
* Backported in [#67969](https://github.com/ClickHouse/ClickHouse/issues/67969): Fixed reading of subcolumns after `ALTER ADD COLUMN` query. [#66243](https://github.com/ClickHouse/ClickHouse/pull/66243) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
* Backported in [#67637](https://github.com/ClickHouse/ClickHouse/issues/67637): Fix for occasional deadlock in Context::getDDLWorker. [#66843](https://github.com/ClickHouse/ClickHouse/pull/66843) ([Alexander Gololobov](https://github.com/davenger)).
* Backported in [#67820](https://github.com/ClickHouse/ClickHouse/issues/67820): Fix possible deadlock on query cancel with parallel replicas. [#66905](https://github.com/ClickHouse/ClickHouse/pull/66905) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#67818](https://github.com/ClickHouse/ClickHouse/issues/67818): Only relevant to the experimental Variant data type. Fix crash with Variant + AggregateFunction type. [#67122](https://github.com/ClickHouse/ClickHouse/pull/67122) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67766](https://github.com/ClickHouse/ClickHouse/issues/67766): Fix crash of `uniq` and `uniqTheta ` with `tuple()` argument. Closes [#67303](https://github.com/ClickHouse/ClickHouse/issues/67303). [#67306](https://github.com/ClickHouse/ClickHouse/pull/67306) ([flynn](https://github.com/ucasfl)).
* Backported in [#67881](https://github.com/ClickHouse/ClickHouse/issues/67881): Correctly parse file name/URI containing `::` if it's not an archive. [#67433](https://github.com/ClickHouse/ClickHouse/pull/67433) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#67854](https://github.com/ClickHouse/ClickHouse/issues/67854): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)).
* Backported in [#67713](https://github.com/ClickHouse/ClickHouse/issues/67713): Fix reloading SQL UDFs with UNION. Previously, restarting the server could make UDF invalid. [#67665](https://github.com/ClickHouse/ClickHouse/pull/67665) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#67840](https://github.com/ClickHouse/ClickHouse/issues/67840): Fix potential stack overflow in `JSONMergePatch` function. Renamed this function from `jsonMergePatch` to `JSONMergePatch` because the previous name was wrong. The previous name is still kept for compatibility. Improved diagnostic of errors in the function. This closes [#67304](https://github.com/ClickHouse/ClickHouse/issues/67304). [#67756](https://github.com/ClickHouse/ClickHouse/pull/67756) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Backported in [#67995](https://github.com/ClickHouse/ClickHouse/issues/67995): Validate experimental/suspicious data types in ALTER ADD/MODIFY COLUMN. [#67911](https://github.com/ClickHouse/ClickHouse/pull/67911) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#67518](https://github.com/ClickHouse/ClickHouse/issues/67518): Split slow test 03036_dynamic_read_subcolumns. [#66954](https://github.com/ClickHouse/ClickHouse/pull/66954) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#67516](https://github.com/ClickHouse/ClickHouse/issues/67516): Split 01508_partition_pruning_long. [#66983](https://github.com/ClickHouse/ClickHouse/pull/66983) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#67529](https://github.com/ClickHouse/ClickHouse/issues/67529): Reduce max time of 00763_long_lock_buffer_alter_destination_table. [#67185](https://github.com/ClickHouse/ClickHouse/pull/67185) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#67643](https://github.com/ClickHouse/ClickHouse/issues/67643): [Green CI] Fix potentially flaky test_mask_sensitive_info integration test. [#67506](https://github.com/ClickHouse/ClickHouse/pull/67506) ([Alexey Katsman](https://github.com/alexkats)).
* Backported in [#67609](https://github.com/ClickHouse/ClickHouse/issues/67609): Fix test_zookeeper_config_load_balancing after adding the xdist worker name to the instance. [#67590](https://github.com/ClickHouse/ClickHouse/pull/67590) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#67871](https://github.com/ClickHouse/ClickHouse/issues/67871): Fix 02434_cancel_insert_when_client_dies. [#67600](https://github.com/ClickHouse/ClickHouse/pull/67600) ([vdimir](https://github.com/vdimir)).
* Backported in [#67704](https://github.com/ClickHouse/ClickHouse/issues/67704): Fix 02910_bad_logs_level_in_local in fast tests. [#67603](https://github.com/ClickHouse/ClickHouse/pull/67603) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#67689](https://github.com/ClickHouse/ClickHouse/issues/67689): Fix 01605_adaptive_granularity_block_borders. [#67605](https://github.com/ClickHouse/ClickHouse/pull/67605) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#67827](https://github.com/ClickHouse/ClickHouse/issues/67827): Try fix 03143_asof_join_ddb_long. [#67620](https://github.com/ClickHouse/ClickHouse/pull/67620) ([Nikita Taranov](https://github.com/nickitat)).
* Backported in [#67892](https://github.com/ClickHouse/ClickHouse/issues/67892): Revert "Merge pull request [#66510](https://github.com/ClickHouse/ClickHouse/issues/66510) from canhld94/fix_trivial_count_non_deterministic_func". [#67800](https://github.com/ClickHouse/ClickHouse/pull/67800) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)).
* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).

View File

@ -0,0 +1,36 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.7.4.51-stable (70fe2f6fa52) FIXME as compared to v24.7.3.42-stable (63730bc4293)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#68232](https://github.com/ClickHouse/ClickHouse/issues/68232): Fixed `Not-ready Set` in some system tables when filtering using subqueries. [#66018](https://github.com/ClickHouse/ClickHouse/pull/66018) ([Michael Kolupaev](https://github.com/al13n321)).
* Backported in [#68068](https://github.com/ClickHouse/ClickHouse/issues/68068): Fix boolean literals in query sent to external database (for engines like `PostgreSQL`). [#66282](https://github.com/ClickHouse/ClickHouse/pull/66282) ([vdimir](https://github.com/vdimir)).
* Backported in [#68613](https://github.com/ClickHouse/ClickHouse/issues/68613): Fixes [#66026](https://github.com/ClickHouse/ClickHouse/issues/66026). Avoid unresolved table function arguments traversal in `ReplaceTableNodeToDummyVisitor`. [#67522](https://github.com/ClickHouse/ClickHouse/pull/67522) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#68278](https://github.com/ClickHouse/ClickHouse/issues/68278): Fix inserting into stream like engines (Kafka, RabbitMQ, NATS) through HTTP interface. [#67554](https://github.com/ClickHouse/ClickHouse/pull/67554) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#68040](https://github.com/ClickHouse/ClickHouse/issues/68040): Fix creation of view with recursive CTE. [#67587](https://github.com/ClickHouse/ClickHouse/pull/67587) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backported in [#68038](https://github.com/ClickHouse/ClickHouse/issues/68038): Fix crash on `percent_rank`. `percent_rank`'s default frame type is changed to `range unbounded preceding and unbounded following`. `IWindowFunction`'s default window frame is considered and now window functions without window frame definition in sql can be put into different `WindowTransfomer`s properly. [#67661](https://github.com/ClickHouse/ClickHouse/pull/67661) ([lgbo](https://github.com/lgbo-ustc)).
* Backported in [#68224](https://github.com/ClickHouse/ClickHouse/issues/68224): Fix wrong `count()` result when there is non-deterministic function in predicate. [#67922](https://github.com/ClickHouse/ClickHouse/pull/67922) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Backported in [#68095](https://github.com/ClickHouse/ClickHouse/issues/68095): Fixed the calculation of the maximum thread soft limit in containerized environments where the usable CPU count is limited. [#67963](https://github.com/ClickHouse/ClickHouse/pull/67963) ([Robert Schulze](https://github.com/rschu1ze)).
* Backported in [#68126](https://github.com/ClickHouse/ClickHouse/issues/68126): Fixed skipping of untouched parts in mutations with new analyzer. Previously with enabled analyzer data in part could be rewritten by mutation even if mutation doesn't affect this part according to predicate. [#68052](https://github.com/ClickHouse/ClickHouse/pull/68052) ([Anton Popov](https://github.com/CurtizJ)).
* Backported in [#68223](https://github.com/ClickHouse/ClickHouse/issues/68223): Fixed a NULL pointer dereference, triggered by a specially crafted query, that crashed the server via hopEnd, hopStart, tumbleEnd, and tumbleStart. [#68098](https://github.com/ClickHouse/ClickHouse/pull/68098) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Backported in [#68175](https://github.com/ClickHouse/ClickHouse/issues/68175): Removes an incorrect optimization to remove sorting in subqueries that use `OFFSET`. Fixes [#67906](https://github.com/ClickHouse/ClickHouse/issues/67906). [#68099](https://github.com/ClickHouse/ClickHouse/pull/68099) ([Graham Campbell](https://github.com/GrahamCampbell)).
* Backported in [#68341](https://github.com/ClickHouse/ClickHouse/issues/68341): Try fix postgres crash when query is cancelled. [#68288](https://github.com/ClickHouse/ClickHouse/pull/68288) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#68398](https://github.com/ClickHouse/ClickHouse/issues/68398): Fix missing sync replica mode in query `SYSTEM SYNC REPLICA`. [#68326](https://github.com/ClickHouse/ClickHouse/pull/68326) ([Duc Canh Le](https://github.com/canhld94)).
* Backported in [#68669](https://github.com/ClickHouse/ClickHouse/issues/68669): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Backported in [#67803](https://github.com/ClickHouse/ClickHouse/issues/67803): Disable some Dynamic tests under sanitizers, rewrite 03202_dynamic_null_map_subcolumn to sql. [#67359](https://github.com/ClickHouse/ClickHouse/pull/67359) ([Kruglov Pavel](https://github.com/Avogar)).
* Backported in [#68081](https://github.com/ClickHouse/ClickHouse/issues/68081): Add an explicit error for `ALTER MODIFY SQL SECURITY` on non-view tables. [#67953](https://github.com/ClickHouse/ClickHouse/pull/67953) ([pufit](https://github.com/pufit)).
* Update version after release. [#68044](https://github.com/ClickHouse/ClickHouse/pull/68044) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Backported in [#68269](https://github.com/ClickHouse/ClickHouse/issues/68269): [Green CI] Fix test 01903_correct_block_size_prediction_with_default. [#68203](https://github.com/ClickHouse/ClickHouse/pull/68203) ([Pablo Marcos](https://github.com/pamarcos)).
* Backported in [#68432](https://github.com/ClickHouse/ClickHouse/issues/68432): tests: make 01600_parts_states_metrics_long better. [#68265](https://github.com/ClickHouse/ClickHouse/pull/68265) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#68538](https://github.com/ClickHouse/ClickHouse/issues/68538): CI: Native build for package_aarch64. [#68457](https://github.com/ClickHouse/ClickHouse/pull/68457) ([Max K.](https://github.com/maxknv)).
* Backported in [#68555](https://github.com/ClickHouse/ClickHouse/issues/68555): CI: Minor release workflow fix. [#68536](https://github.com/ClickHouse/ClickHouse/pull/68536) ([Max K.](https://github.com/maxknv)).
* Backported in [#68760](https://github.com/ClickHouse/ClickHouse/issues/68760): To make patch release possible from every commit on release branch, package_debug build is required and must not be skipped. [#68750](https://github.com/ClickHouse/ClickHouse/pull/68750) ([Max K.](https://github.com/maxknv)).

View File

@ -0,0 +1,12 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.8.2.3-lts (b54f79ed323) FIXME as compared to v24.8.1.2684-lts (161c62fd295)
#### Bug Fix (user-visible misbehavior in an official stable release)
* Backported in [#68670](https://github.com/ClickHouse/ClickHouse/issues/68670): Fix `LOGICAL_ERROR`s when functions `sipHash64Keyed`, `sipHash128Keyed`, or `sipHash128ReferenceKeyed` are applied to empty arrays or tuples. [#68630](https://github.com/ClickHouse/ClickHouse/pull/68630) ([Robert Schulze](https://github.com/rschu1ze)).

View File

@ -80,7 +80,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
`PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional. `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.
Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key. Specifying a sorting key (using `ORDER BY` clause) implicitly specifies a primary key.
It is usually not necessary to specify the primary key in addition to the primary key. It is usually not necessary to specify the primary key in addition to the sorting key.
#### SAMPLE BY #### SAMPLE BY

View File

@ -1389,7 +1389,7 @@ DESC format(JSONEachRow, '{"id" : 1, "age" : 25, "name" : "Josh", "status" : nul
#### schema_inference_make_columns_nullable #### schema_inference_make_columns_nullable
Controls making inferred types `Nullable` in schema inference for formats without information about nullability. Controls making inferred types `Nullable` in schema inference for formats without information about nullability.
If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will be `Nullable` only if `input_format_null_as_default` is disabled and the column contains `NULL` in a sample that is parsed during schema inference. If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
Enabled by default. Enabled by default.
@ -1412,15 +1412,13 @@ DESC format(JSONEachRow, $$
└─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ └─────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
``` ```
```sql ```sql
SET schema_inference_make_columns_nullable = 0; SET schema_inference_make_columns_nullable = 'auto';
SET input_format_null_as_default = 0;
DESC format(JSONEachRow, $$ DESC format(JSONEachRow, $$
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}
$$) $$)
``` ```
```response ```response
┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ ┌─name────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ Int64 │ │ │ │ │ │ │ id │ Int64 │ │ │ │ │ │
│ age │ Int64 │ │ │ │ │ │ │ age │ Int64 │ │ │ │ │ │
@ -1432,7 +1430,6 @@ DESC format(JSONEachRow, $$
```sql ```sql
SET schema_inference_make_columns_nullable = 0; SET schema_inference_make_columns_nullable = 0;
SET input_format_null_as_default = 1;
DESC format(JSONEachRow, $$ DESC format(JSONEachRow, $$
{"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]} {"id" : 1, "age" : 25, "name" : "Josh", "status" : null, "hobbies" : ["football", "cooking"]}
{"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]} {"id" : 2, "age" : 19, "name" : "Alan", "status" : "married", "hobbies" : ["tennis", "art"]}

View File

@ -171,8 +171,8 @@ If the `schema_inference_hints` is not formated properly, or if there is a typo
## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} ## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable}
Controls making inferred types `Nullable` in schema inference for formats without information about nullability. Controls making inferred types `Nullable` in schema inference.
If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. If the setting is enabled, all inferred type will be `Nullable`, if disabled, the inferred type will never be `Nullable`, if set to `auto`, the inferred type will be `Nullable` only if the column contains `NULL` in a sample that is parsed during schema inference or file metadata contains information about column nullability.
Default value: `true`. Default value: `true`.

View File

@ -70,7 +70,7 @@ SELECT '{"a" : {"b" : 42},"c" : [1, 2, 3], "d" : "Hello, World!"}'::JSON as json
└────────────────────────────────────────────────┘ └────────────────────────────────────────────────┘
``` ```
CAST from named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later. CAST from `JSON`, named `Tuple`, `Map` and `Object('json')` to `JSON` type will be supported later.
## Reading JSON paths as subcolumns ## Reading JSON paths as subcolumns

View File

@ -6,7 +6,7 @@ title: "Functions for Working with Geohash"
## Geohash ## Geohash
[Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earths surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer is the geohash string, the more precise is the geographic location. [Geohash](https://en.wikipedia.org/wiki/Geohash) is the geocode system, which subdivides Earths surface into buckets of grid shape and encodes each cell into a short string of letters and digits. It is a hierarchical data structure, so the longer the geohash string is, the more precise the geographic location will be.
If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/). If you need to manually convert geographic coordinates to geohash strings, you can use [geohash.org](http://geohash.org/).
@ -14,26 +14,37 @@ If you need to manually convert geographic coordinates to geohash strings, you c
Encodes latitude and longitude as a [geohash](#geohash)-string. Encodes latitude and longitude as a [geohash](#geohash)-string.
**Syntax**
``` sql ``` sql
geohashEncode(longitude, latitude, [precision]) geohashEncode(longitude, latitude, [precision])
``` ```
**Input values** **Input values**
- longitude - longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]` - `longitude` — Longitude part of the coordinate you want to encode. Floating in range`[-180°, 180°]`. [Float](../../data-types/float.md).
- latitude - latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]` - `latitude` — Latitude part of the coordinate you want to encode. Floating in range `[-90°, 90°]`. [Float](../../data-types/float.md).
- precision - Optional, length of the resulting encoded string, defaults to `12`. Integer in range `[1, 12]`. Any value less than `1` or greater than `12` is silently converted to `12`. - `precision` (optional) — Length of the resulting encoded string. Defaults to `12`. Integer in the range `[1, 12]`. [Int8](../../data-types/int-uint.md).
:::note
- All coordinate parameters must be of the same type: either `Float32` or `Float64`.
- For the `precision` parameter, any value less than `1` or greater than `12` is silently converted to `12`.
:::
**Returned values** **Returned values**
- alphanumeric `String` of encoded coordinate (modified version of the base32-encoding alphabet is used). - Alphanumeric string of the encoded coordinate (modified version of the base32-encoding alphabet is used). [String](../../data-types/string.md).
**Example** **Example**
Query:
``` sql ``` sql
SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res; SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res;
``` ```
Result:
``` text ``` text
┌─res──────────┐ ┌─res──────────┐
│ ezs42d000000 │ │ ezs42d000000 │
@ -44,13 +55,19 @@ SELECT geohashEncode(-5.60302734375, 42.593994140625, 0) AS res;
Decodes any [geohash](#geohash)-encoded string into longitude and latitude. Decodes any [geohash](#geohash)-encoded string into longitude and latitude.
**Syntax**
```sql
geohashDecode(hash_str)
```
**Input values** **Input values**
- encoded string - geohash-encoded string. - `hash_str` — Geohash-encoded string.
**Returned values** **Returned values**
- (longitude, latitude) - 2-tuple of `Float64` values of longitude and latitude. - Tuple `(longitude, latitude)` of `Float64` values of longitude and latitude. [Tuple](../../data-types/tuple.md)([Float64](../../data-types/float.md))
**Example** **Example**

View File

@ -688,6 +688,40 @@ SELECT kostikConsistentHash(16045690984833335023, 2);
└───────────────────────────────────────────────┘ └───────────────────────────────────────────────┘
``` ```
## ripeMD160
Produces [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) hash value.
**Syntax**
```sql
ripeMD160(input)
```
**Parameters**
- `input`: Input string. [String](../data-types/string.md)
**Returned value**
- A [UInt256](../data-types/int-uint.md) hash value where the 160-bit RIPEMD-160 hash is stored in the first 20 bytes. The remaining 12 bytes are zero-padded.
**Example**
Use the [hex](../functions/encoding-functions.md/#hex) function to represent the result as a hex-encoded string.
Query:
```sql
SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
```
```response
┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │
└───────────────────────────────────────────────────────────────┘
```
## murmurHash2_32, murmurHash2_64 ## murmurHash2_32, murmurHash2_64
Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value. Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value.

View File

@ -22,18 +22,26 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su
### Из deb-пакетов {#install-from-deb-packages} ### Из deb-пакетов {#install-from-deb-packages}
Яндекс рекомендует использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните: Рекомендуется использовать официальные скомпилированные `deb`-пакеты для Debian или Ubuntu. Для установки пакетов выполните:
``` bash ``` bash
sudo apt-get install -y apt-transport-https ca-certificates dirmngr sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg
echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \ echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
/etc/apt/sources.list.d/clickhouse.list /etc/apt/sources.list.d/clickhouse.list
sudo apt-get update sudo apt-get update
```
#### Установка ClickHouse server и client
```bash
sudo apt-get install -y clickhouse-server clickhouse-client sudo apt-get install -y clickhouse-server clickhouse-client
```
#### Запуск ClickHouse server
```bash
sudo service clickhouse-server start sudo service clickhouse-server start
clickhouse-client # or "clickhouse-client --password" if you've set up a password. clickhouse-client # or "clickhouse-client --password" if you've set up a password.
``` ```
@ -55,7 +63,7 @@ clickhouse-client # or "clickhouse-client --password" if you've set up a passwor
::: :::
### Из rpm-пакетов {#from-rpm-packages} ### Из rpm-пакетов {#from-rpm-packages}
Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. Команда ClickHouse рекомендует использовать официальные предкомпилированные `rpm`-пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm.
#### Установка официального репозитория #### Установка официального репозитория
@ -102,7 +110,7 @@ sudo yum install clickhouse-server clickhouse-client
### Из tgz-архивов {#from-tgz-archives} ### Из tgz-архивов {#from-tgz-archives}
Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов. Команда ClickHouse рекомендует использовать предкомпилированные бинарники из `tgz`-архивов для всех дистрибутивов, где невозможна установка `deb`- и `rpm`- пакетов.
Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/. Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/.
После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии: После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии:

View File

@ -124,6 +124,40 @@ SELECT hex(sipHash128('foo', '\x01', 3));
└──────────────────────────────────┘ └──────────────────────────────────┘
``` ```
## ripeMD160
Генерирует [RIPEMD-160](https://en.wikipedia.org/wiki/RIPEMD) хеш строки.
**Синтаксис**
```sql
ripeMD160(input)
```
**Аргументы**
- `input`: Строка [String](../data-types/string.md)
**Возвращаемое значение**
- [UInt256](../data-types/int-uint.md), где 160-битный хеш RIPEMD-160 хранится в первых 20 байтах. Оставшиеся 12 байт заполняются нулями.
**Пример**
Используйте функцию [hex](../functions/encoding-functions.md#hex) для представления результата в виде строки с шестнадцатеричной кодировкой
Запрос:
```sql
SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));
```
Результат:
```response
┌─hex(ripeMD160('The quick brown fox jumps over the lazy dog'))─┐
│ 37F332F68DB77BD9D7EDD4969571AD671CF9DD3B │
└───────────────────────────────────────────────────────────────┘
```
## cityHash64 {#cityhash64} ## cityHash64 {#cityhash64}
Генерирует 64-х битное значение [CityHash](https://github.com/google/cityhash). Генерирует 64-х битное значение [CityHash](https://github.com/google/cityhash).

View File

@ -120,7 +120,7 @@ void RoleCache::collectEnabledRoles(EnabledRoles & enabled_roles, SubscriptionsO
SubscriptionsOnRoles new_subscriptions_on_roles; SubscriptionsOnRoles new_subscriptions_on_roles;
new_subscriptions_on_roles.reserve(subscriptions_on_roles.size()); new_subscriptions_on_roles.reserve(subscriptions_on_roles.size());
auto get_role_function = [this, &subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, subscriptions_on_roles); }; auto get_role_function = [this, &new_subscriptions_on_roles](const UUID & id) TSA_NO_THREAD_SAFETY_ANALYSIS { return getRole(id, new_subscriptions_on_roles); };
for (const auto & current_role : enabled_roles.params.current_roles) for (const auto & current_role : enabled_roles.params.current_roles)
collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false); collectRoles(*new_info, skip_ids, get_role_function, current_role, true, false);

View File

@ -692,7 +692,7 @@ QueryTreeNodePtr IdentifierResolver::tryResolveIdentifierFromStorage(
result_column_node = it->second; result_column_node = it->second;
} }
/// Check if it's a dynamic subcolumn /// Check if it's a dynamic subcolumn
else else if (table_expression_data.supports_subcolumns)
{ {
auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name); auto [column_name, dynamic_subcolumn_name] = Nested::splitName(identifier_full_name);
auto jt = table_expression_data.column_name_to_column_node.find(column_name); auto jt = table_expression_data.column_name_to_column_node.find(column_name);

View File

@ -4379,7 +4379,10 @@ void QueryAnalyzer::initializeTableExpressionData(const QueryTreeNodePtr & table
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals();
if (storage_snapshot->storage.supportsSubcolumns()) if (storage_snapshot->storage.supportsSubcolumns())
{
get_column_options.withSubcolumns(); get_column_options.withSubcolumns();
table_expression_data.supports_subcolumns = true;
}
auto column_names_and_types = storage_snapshot->getColumns(get_column_options); auto column_names_and_types = storage_snapshot->getColumns(get_column_options);
table_expression_data.column_names_and_types = NamesAndTypes(column_names_and_types.begin(), column_names_and_types.end()); table_expression_data.column_names_and_types = NamesAndTypes(column_names_and_types.begin(), column_names_and_types.end());

View File

@ -36,6 +36,7 @@ struct AnalysisTableExpressionData
std::string database_name; std::string database_name;
std::string table_name; std::string table_name;
bool should_qualify_columns = true; bool should_qualify_columns = true;
bool supports_subcolumns = false;
NamesAndTypes column_names_and_types; NamesAndTypes column_names_and_types;
ColumnNameToColumnNodeMap column_name_to_column_node; ColumnNameToColumnNodeMap column_name_to_column_node;
std::unordered_set<std::string> subcolumn_names; /// Subset columns that are subcolumns of other columns std::unordered_set<std::string> subcolumn_names; /// Subset columns that are subcolumns of other columns

View File

@ -145,6 +145,9 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
/// work we need to pass host name separately. It will be send into TLS Hello packet to let /// work we need to pass host name separately. It will be send into TLS Hello packet to let
/// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI). /// the server know which host we want to talk with (single IP can process requests for multiple hosts using SNI).
static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setPeerHostName(host); static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setPeerHostName(host);
/// we want to postpone SSL handshake until first read or write operation
/// so any errors during negotiation would be properly processed
static_cast<Poco::Net::SecureStreamSocket*>(socket.get())->setLazyHandshake(true);
#else #else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "tcp_secure protocol is disabled because poco library was built without NetSSL support."); throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "tcp_secure protocol is disabled because poco library was built without NetSSL support.");
#endif #endif

View File

@ -299,13 +299,14 @@ ReplxxLineReader::ReplxxLineReader(
Patterns delimiters_, Patterns delimiters_,
const char word_break_characters_[], const char word_break_characters_[],
replxx::Replxx::highlighter_callback_t highlighter_, replxx::Replxx::highlighter_callback_t highlighter_,
[[ maybe_unused ]] std::istream & input_stream_, std::istream & input_stream_,
[[ maybe_unused ]] std::ostream & output_stream_, std::ostream & output_stream_,
[[ maybe_unused ]] int in_fd_, int in_fd_,
[[ maybe_unused ]] int out_fd_, int out_fd_,
[[ maybe_unused ]] int err_fd_ int err_fd_
) )
: LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_) : LineReader(history_file_path_, multiline_, std::move(extenders_), std::move(delimiters_), input_stream_, output_stream_, in_fd_)
, rx(input_stream_, output_stream_, in_fd_, out_fd_, err_fd_)
, highlighter(std::move(highlighter_)) , highlighter(std::move(highlighter_))
, word_break_characters(word_break_characters_) , word_break_characters(word_break_characters_)
, editor(getEditor()) , editor(getEditor())
@ -516,7 +517,7 @@ void ReplxxLineReader::addToHistory(const String & line)
rx.history_add(line); rx.history_add(line);
// flush changes to the disk // flush changes to the disk
if (!rx.history_save(history_file_path)) if (history_file_fd >= 0 && !rx.history_save(history_file_path))
rx.print("Saving history failed: %s\n", errnoToString().c_str()); rx.print("Saving history failed: %s\n", errnoToString().c_str());
if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN)) if (history_file_fd >= 0 && locked && 0 != flock(history_file_fd, LOCK_UN))

View File

@ -1181,13 +1181,14 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
/// Check if the number of all dynamic types exceeds the limit. /// Check if the number of all dynamic types exceeds the limit.
if (!canAddNewVariants(0, all_variants.size())) if (!canAddNewVariants(0, all_variants.size()))
{ {
/// Create list of variants with their sizes and sort it. /// Create a list of variants with their sizes and names and then sort it.
std::vector<std::pair<size_t, DataTypePtr>> variants_with_sizes; std::vector<std::tuple<size_t, String, DataTypePtr>> variants_with_sizes;
variants_with_sizes.reserve(all_variants.size()); variants_with_sizes.reserve(all_variants.size());
for (const auto & variant : all_variants) for (const auto & variant : all_variants)
{ {
if (variant->getName() != getSharedVariantTypeName()) auto variant_name = variant->getName();
variants_with_sizes.emplace_back(total_sizes[variant->getName()], variant); if (variant_name != getSharedVariantTypeName())
variants_with_sizes.emplace_back(total_sizes[variant_name], variant_name, variant);
} }
std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater()); std::sort(variants_with_sizes.begin(), variants_with_sizes.end(), std::greater());
@ -1196,14 +1197,14 @@ void ColumnDynamic::takeDynamicStructureFromSourceColumns(const Columns & source
result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant. result_variants.reserve(max_dynamic_types + 1); /// +1 for shared variant.
/// Add shared variant. /// Add shared variant.
result_variants.push_back(getSharedVariantDataType()); result_variants.push_back(getSharedVariantDataType());
for (const auto & [size, variant] : variants_with_sizes) for (const auto & [size, variant_name, variant_type] : variants_with_sizes)
{ {
/// Add variant to the resulting variants list until we reach max_dynamic_types. /// Add variant to the resulting variants list until we reach max_dynamic_types.
if (canAddNewVariant(result_variants.size())) if (canAddNewVariant(result_variants.size()))
result_variants.push_back(variant); result_variants.push_back(variant_type);
/// Add all remaining variants into shared_variants_statistics until we reach its max size. /// Add all remaining variants into shared_variants_statistics until we reach its max size.
else if (new_statistics.shared_variants_statistics.size() < Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE) else if (new_statistics.shared_variants_statistics.size() < Statistics::MAX_SHARED_VARIANT_STATISTICS_SIZE)
new_statistics.shared_variants_statistics[variant->getName()] = size; new_statistics.shared_variants_statistics[variant_name] = size;
else else
break; break;
} }

View File

@ -127,7 +127,7 @@ std::string ColumnObject::getName() const
{ {
WriteBufferFromOwnString ss; WriteBufferFromOwnString ss;
ss << "Object("; ss << "Object(";
ss << "max_dynamic_paths=" << max_dynamic_paths; ss << "max_dynamic_paths=" << global_max_dynamic_paths;
ss << ", max_dynamic_types=" << max_dynamic_types; ss << ", max_dynamic_types=" << max_dynamic_types;
std::vector<String> sorted_typed_paths; std::vector<String> sorted_typed_paths;
sorted_typed_paths.reserve(typed_paths.size()); sorted_typed_paths.reserve(typed_paths.size());
@ -1045,9 +1045,9 @@ void ColumnObject::forEachSubcolumnRecursively(DB::IColumn::RecursiveMutableColu
bool ColumnObject::structureEquals(const IColumn & rhs) const bool ColumnObject::structureEquals(const IColumn & rhs) const
{ {
/// 2 Object columns have equal structure if they have the same typed paths and max_dynamic_paths/max_dynamic_types. /// 2 Object columns have equal structure if they have the same typed paths and global_max_dynamic_paths/max_dynamic_types.
const auto * rhs_object = typeid_cast<const ColumnObject *>(&rhs); const auto * rhs_object = typeid_cast<const ColumnObject *>(&rhs);
if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || max_dynamic_paths != rhs_object->max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types) if (!rhs_object || typed_paths.size() != rhs_object->typed_paths.size() || global_max_dynamic_paths != rhs_object->global_max_dynamic_paths || max_dynamic_types != rhs_object->max_dynamic_types)
return false; return false;
for (const auto & [path, column] : typed_paths) for (const auto & [path, column] : typed_paths)

View File

@ -953,7 +953,7 @@ ColumnPtr ColumnVariant::index(const IColumn & indexes, size_t limit) const
{ {
/// If we have only NULLs, index will take no effect, just return resized column. /// If we have only NULLs, index will take no effect, just return resized column.
if (hasOnlyNulls()) if (hasOnlyNulls())
return cloneResized(limit); return cloneResized(limit == 0 ? indexes.size(): limit);
/// Optimization when we have only one non empty variant and no NULLs. /// Optimization when we have only one non empty variant and no NULLs.
/// In this case local_discriminators column is filled with identical values and offsets column /// In this case local_discriminators column is filled with identical values and offsets column
@ -1009,8 +1009,16 @@ ColumnPtr ColumnVariant::indexImpl(const PaddedPODArray<Type> & indexes, size_t
new_variants.reserve(num_variants); new_variants.reserve(num_variants);
for (size_t i = 0; i != num_variants; ++i) for (size_t i = 0; i != num_variants; ++i)
{ {
size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size(); /// Check if no values from this variant were selected.
new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit)); if (nested_perms[i].empty())
{
new_variants.emplace_back(variants[i]->cloneEmpty());
}
else
{
size_t nested_limit = nested_perms[i].size() == variants[i]->size() ? 0 : nested_perms[i].size();
new_variants.emplace_back(variants[i]->permute(nested_perms[i], nested_limit));
}
} }
/// We cannot use new_offsets column as an offset column, because it became invalid after variants permutation. /// We cannot use new_offsets column as an offset column, because it became invalid after variants permutation.

View File

@ -273,6 +273,25 @@ void SystemLogBase<LogElement>::startup()
saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); }); saving_thread = std::make_unique<ThreadFromGlobalPool>([this] { savingThreadFunction(); });
} }
template <typename LogElement>
void SystemLogBase<LogElement>::stopFlushThread()
{
{
std::lock_guard lock(thread_mutex);
if (!saving_thread || !saving_thread->joinable())
return;
if (is_shutdown)
return;
is_shutdown = true;
queue->shutdown();
}
saving_thread->join();
}
template <typename LogElement> template <typename LogElement>
void SystemLogBase<LogElement>::add(LogElement element) void SystemLogBase<LogElement>::add(LogElement element)
{ {

View File

@ -216,6 +216,8 @@ public:
static consteval bool shouldTurnOffLogger() { return false; } static consteval bool shouldTurnOffLogger() { return false; }
protected: protected:
void stopFlushThread() final;
std::shared_ptr<SystemLogQueue<LogElement>> queue; std::shared_ptr<SystemLogQueue<LogElement>> queue;
}; };
} }

View File

@ -1120,7 +1120,7 @@ class IColumn;
M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \ M(String, column_names_for_schema_inference, "", "The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...'", 0) \
M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \ M(String, schema_inference_hints, "", "The list of column names and types to use in schema inference for formats without column names. The format: 'column_name1 column_type1, column_name2 column_type2, ...'", 0) \
M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \ M(SchemaInferenceMode, schema_inference_mode, "default", "Mode of schema inference. 'default' - assume that all files have the same schema and schema can be inferred from any file, 'union' - files can have different schemas and the resulting schema should be the a union of schemas of all files", 0) \
M(Bool, schema_inference_make_columns_nullable, true, "If set to true, all inferred types will be Nullable in schema inference for formats without information about nullability.", 0) \ M(UInt64Auto, schema_inference_make_columns_nullable, 1, "If set to true, all inferred types will be Nullable in schema inference. When set to false, no columns will be converted to Nullable. When set to 'auto', ClickHouse will use information about nullability from the data.", 0) \
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \ M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \ M(Bool, input_format_json_read_bools_as_strings, true, "Allow to parse bools as strings in JSON input formats", 0) \
M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \ M(Bool, input_format_json_try_infer_numbers_from_strings, false, "Try to infer numbers from string fields while schema inference", 0) \

View File

@ -72,11 +72,13 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"24.9", {"24.9",
{ {
{"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"}, {"input_format_try_infer_variants", false, false, "Try to infer Variant type in text formats when there is more than one possible type for column/array elements"},
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
{"create_if_not_exists", false, false, "New setting."},
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
} }
}, },
{"24.8", {"24.8",
{ {
{"create_if_not_exists", false, false, "New setting."},
{"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"}, {"rows_before_aggregation", true, true, "Provide exact value for rows_before_aggregation statistic, represents the number of rows read before aggregation"},
{"restore_replace_external_table_functions_to_null", false, false, "New setting."}, {"restore_replace_external_table_functions_to_null", false, false, "New setting."},
{"restore_replace_external_engines_to_null", false, false, "New setting."}, {"restore_replace_external_engines_to_null", false, false, "New setting."},
@ -85,7 +87,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."}, {"use_hive_partitioning", false, false, "Allows to use hive partitioning for File, URL, S3, AzureBlobStorage and HDFS engines."},
{"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"}, {"allow_experimental_kafka_offsets_storage_in_keeper", false, false, "Allow the usage of experimental Kafka storage engine that stores the committed offsets in ClickHouse Keeper"},
{"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."}, {"allow_archive_path_syntax", true, true, "Added new setting to allow disabling archive path syntax."},
{"allow_materialized_view_with_bad_select", true, true, "Support (but not enable yet) stricter validation in CREATE MATERIALIZED VIEW"},
{"query_cache_tag", "", "", "New setting for labeling query cache settings."}, {"query_cache_tag", "", "", "New setting for labeling query cache settings."},
{"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"}, {"allow_experimental_time_series_table", false, false, "Added new setting to allow the TimeSeries table engine"},
{"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."}, {"enable_analyzer", 1, 1, "Added an alias to a setting `allow_experimental_analyzer`."},
@ -93,7 +94,6 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"allow_experimental_json_type", false, false, "Add new experimental JSON type"}, {"allow_experimental_json_type", false, false, "Add new experimental JSON type"},
{"use_json_alias_for_old_object_type", true, false, "Use JSON type alias to create new JSON type"}, {"use_json_alias_for_old_object_type", true, false, "Use JSON type alias to create new JSON type"},
{"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"}, {"type_json_skip_duplicated_paths", false, false, "Allow to skip duplicated paths during JSON parsing"},
{"join_output_by_rowlist_perkey_rows_threshold", 0, 5, "The lower limit of per-key average rows in the right table to determine whether to output by row list in hash join."},
{"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"}, {"allow_experimental_vector_similarity_index", false, false, "Added new setting to allow experimental vector similarity indexes"},
{"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"} {"input_format_try_infer_datetimes_only_datetime64", true, false, "Allow to infer DateTime instead of DateTime64 in data formats"}
} }

View File

@ -257,7 +257,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se
format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference; format_settings.max_bytes_to_read_for_schema_inference = settings.input_format_max_bytes_to_read_for_schema_inference;
format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference; format_settings.column_names_for_schema_inference = settings.column_names_for_schema_inference;
format_settings.schema_inference_hints = settings.schema_inference_hints; format_settings.schema_inference_hints = settings.schema_inference_hints;
format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable; format_settings.schema_inference_make_columns_nullable = settings.schema_inference_make_columns_nullable.valueOr(2);
format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name; format_settings.mysql_dump.table_name = settings.input_format_mysql_dump_table_name;
format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names; format_settings.mysql_dump.map_column_names = settings.input_format_mysql_dump_map_column_names;
format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size; format_settings.sql_insert.max_batch_size = settings.output_format_sql_insert_max_batch_size;

View File

@ -77,7 +77,7 @@ struct FormatSettings
Raw Raw
}; };
bool schema_inference_make_columns_nullable = true; UInt64 schema_inference_make_columns_nullable = 1;
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple; DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;

View File

@ -1344,7 +1344,11 @@ namespace
if (checkCharCaseInsensitive('n', buf)) if (checkCharCaseInsensitive('n', buf))
{ {
if (checkStringCaseInsensitive("ull", buf)) if (checkStringCaseInsensitive("ull", buf))
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>()); {
if (settings.schema_inference_make_columns_nullable == 0)
return std::make_shared<DataTypeNothing>();
return makeNullable(std::make_shared<DataTypeNothing>());
}
else if (checkStringCaseInsensitive("an", buf)) else if (checkStringCaseInsensitive("an", buf))
return std::make_shared<DataTypeFloat64>(); return std::make_shared<DataTypeFloat64>();
} }

View File

@ -19,7 +19,9 @@
#include <Common/HashTable/Hash.h> #include <Common/HashTable/Hash.h>
#if USE_SSL #if USE_SSL
# include <openssl/evp.h>
# include <openssl/md5.h> # include <openssl/md5.h>
# include <openssl/ripemd.h>
#endif #endif
#include <bit> #include <bit>
@ -93,9 +95,9 @@ namespace impl
if (is_const) if (is_const)
i = 0; i = 0;
assert(key0->size() == key1->size()); assert(key0->size() == key1->size());
if (offsets != nullptr) if (offsets != nullptr && i > 0)
{ {
const auto * const begin = offsets->begin(); const auto * const begin = std::upper_bound(offsets->begin(), offsets->end(), i - 1);
const auto * upper = std::upper_bound(begin, offsets->end(), i); const auto * upper = std::upper_bound(begin, offsets->end(), i);
if (upper != offsets->end()) if (upper != offsets->end())
i = upper - begin; i = upper - begin;
@ -196,6 +198,34 @@ T combineHashesFunc(T t1, T t2)
return HashFunction::apply(reinterpret_cast<const char *>(hashes), sizeof(hashes)); return HashFunction::apply(reinterpret_cast<const char *>(hashes), sizeof(hashes));
} }
#if USE_SSL
struct RipeMD160Impl
{
static constexpr auto name = "ripeMD160";
using ReturnType = UInt256;
static UInt256 apply(const char * begin, size_t size)
{
UInt8 digest[RIPEMD160_DIGEST_LENGTH];
RIPEMD160(reinterpret_cast<const unsigned char *>(begin), size, reinterpret_cast<unsigned char *>(digest));
std::reverse(digest, digest + RIPEMD160_DIGEST_LENGTH);
UInt256 res = 0;
std::memcpy(&res, digest, RIPEMD160_DIGEST_LENGTH);
return res;
}
static UInt256 combineHashes(UInt256 h1, UInt256 h2)
{
return combineHashesFunc<UInt256, RipeMD160Impl>(h1, h2);
}
static constexpr bool use_int_hash_for_pods = false;
};
#endif
struct SipHash64Impl struct SipHash64Impl
{ {
@ -1624,6 +1654,7 @@ using FunctionIntHash32 = FunctionIntHash<IntHash32Impl, NameIntHash32>;
using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>; using FunctionIntHash64 = FunctionIntHash<IntHash64Impl, NameIntHash64>;
#if USE_SSL #if USE_SSL
using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>; using FunctionHalfMD5 = FunctionAnyHash<HalfMD5Impl>;
using FunctionRipeMD160Hash = FunctionAnyHash<RipeMD160Impl>;
#endif #endif
using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>; using FunctionSipHash128 = FunctionAnyHash<SipHash128Impl>;
using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>; using FunctionSipHash128Keyed = FunctionAnyHash<SipHash128KeyedImpl, true, SipHash128KeyedImpl::Key, SipHash128KeyedImpl::KeyColumns>;
@ -1652,6 +1683,7 @@ using FunctionXxHash64 = FunctionAnyHash<ImplXxHash64>;
using FunctionXXH3 = FunctionAnyHash<ImplXXH3>; using FunctionXXH3 = FunctionAnyHash<ImplXXH3>;
using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>; using FunctionWyHash64 = FunctionAnyHash<ImplWyHash64>;
} }
#pragma clang diagnostic pop #pragma clang diagnostic pop

View File

@ -0,0 +1,23 @@
#include "FunctionsHashing.h"
#include <Functions/FunctionFactory.h>
/// FunctionsHashing instantiations are separated into files FunctionsHashing*.cpp
/// to better parallelize the build procedure and avoid MSan build failure
/// due to excessive resource consumption.
namespace DB
{
#if USE_SSL
REGISTER_FUNCTION(HashingRipe)
{
factory.registerFunction<FunctionRipeMD160Hash>(FunctionDocumentation{
.description = "RIPEMD-160 hash function, primarily used in Bitcoin address generation.",
.examples{{"", "SELECT hex(ripeMD160('The quick brown fox jumps over the lazy dog'));", R"(
hex(ripeMD160('The quick brown fox jumps over the lazy dog'))
37F332F68DB77BD9D7EDD4969571AD671CF9DD3B
)"}},
.categories{"Hash"}});
}
#endif
}

View File

@ -1598,6 +1598,9 @@ ColumnPtr FunctionArrayElement::executeTuple(const ColumnsWithTypeAndName & argu
const auto & tuple_columns = col_nested->getColumns(); const auto & tuple_columns = col_nested->getColumns();
size_t tuple_size = tuple_columns.size(); size_t tuple_size = tuple_columns.size();
if (tuple_size == 0)
return ColumnTuple::create(input_rows_count);
const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>( const DataTypes & tuple_types = typeid_cast<const DataTypeTuple &>(
*typeid_cast<const DataTypeArray &>(*arguments[0].type).getNestedType()).getElements(); *typeid_cast<const DataTypeArray &>(*arguments[0].type).getNestedType()).getElements();

View File

@ -787,7 +787,7 @@ S3CredentialsProviderChain::S3CredentialsProviderChain(
/// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds. /// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds.
/// But the connection timeout should be small because there is the case when there is no IMDS at all, /// But the connection timeout should be small because there is the case when there is no IMDS at all,
/// like outside of the cloud, on your own machines. /// like outside of the cloud, on your own machines.
aws_client_configuration.connectTimeoutMs = 10; aws_client_configuration.connectTimeoutMs = 50;
aws_client_configuration.requestTimeoutMs = 1000; aws_client_configuration.requestTimeoutMs = 1000;
aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000); aws_client_configuration.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(1, 1000);

View File

@ -701,7 +701,6 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec); col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_deflate_qpl_codec, enable_zstd_qat_codec);
} }
column.statistics.column_name = column.name; /// We assign column name here for better exception error message.
if (col_decl.statistics_desc) if (col_decl.statistics_desc)
{ {
if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics) if (!skip_checks && !context_->getSettingsRef().allow_experimental_statistics)

View File

@ -10,7 +10,7 @@ void PeriodicLog<LogElement>::startCollect(size_t collect_interval_milliseconds_
{ {
collect_interval_milliseconds = collect_interval_milliseconds_; collect_interval_milliseconds = collect_interval_milliseconds_;
is_shutdown_metric_thread = false; is_shutdown_metric_thread = false;
flush_thread = std::make_unique<ThreadFromGlobalPool>([this] { threadFunction(); }); collecting_thread = std::make_unique<ThreadFromGlobalPool>([this] { threadFunction(); });
} }
template <typename LogElement> template <typename LogElement>
@ -19,15 +19,15 @@ void PeriodicLog<LogElement>::stopCollect()
bool old_val = false; bool old_val = false;
if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true)) if (!is_shutdown_metric_thread.compare_exchange_strong(old_val, true))
return; return;
if (flush_thread) if (collecting_thread)
flush_thread->join(); collecting_thread->join();
} }
template <typename LogElement> template <typename LogElement>
void PeriodicLog<LogElement>::shutdown() void PeriodicLog<LogElement>::shutdown()
{ {
stopCollect(); stopCollect();
this->stopFlushThread(); Base::shutdown();
} }
template <typename LogElement> template <typename LogElement>

View File

@ -17,6 +17,7 @@ template <typename LogElement>
class PeriodicLog : public SystemLog<LogElement> class PeriodicLog : public SystemLog<LogElement>
{ {
using SystemLog<LogElement>::SystemLog; using SystemLog<LogElement>::SystemLog;
using Base = SystemLog<LogElement>;
public: public:
using TimePoint = std::chrono::system_clock::time_point; using TimePoint = std::chrono::system_clock::time_point;
@ -24,18 +25,18 @@ public:
/// Launches a background thread to collect metrics with interval /// Launches a background thread to collect metrics with interval
void startCollect(size_t collect_interval_milliseconds_); void startCollect(size_t collect_interval_milliseconds_);
/// Stop background thread
void stopCollect();
void shutdown() final; void shutdown() final;
protected: protected:
/// Stop background thread
void stopCollect();
virtual void stepFunction(TimePoint current_time) = 0; virtual void stepFunction(TimePoint current_time) = 0;
private: private:
void threadFunction(); void threadFunction();
std::unique_ptr<ThreadFromGlobalPool> flush_thread; std::unique_ptr<ThreadFromGlobalPool> collecting_thread;
size_t collect_interval_milliseconds; size_t collect_interval_milliseconds;
std::atomic<bool> is_shutdown_metric_thread{false}; std::atomic<bool> is_shutdown_metric_thread{false};
}; };

View File

@ -402,32 +402,13 @@ SystemLog<LogElement>::SystemLog(
template <typename LogElement> template <typename LogElement>
void SystemLog<LogElement>::shutdown() void SystemLog<LogElement>::shutdown()
{ {
stopFlushThread(); Base::stopFlushThread();
auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext()); auto table = DatabaseCatalog::instance().tryGetTable(table_id, getContext());
if (table) if (table)
table->flushAndShutdown(); table->flushAndShutdown();
} }
template <typename LogElement>
void SystemLog<LogElement>::stopFlushThread()
{
{
std::lock_guard lock(thread_mutex);
if (!saving_thread || !saving_thread->joinable())
return;
if (is_shutdown)
return;
is_shutdown = true;
queue->shutdown();
}
saving_thread->join();
}
template <typename LogElement> template <typename LogElement>
void SystemLog<LogElement>::savingThreadFunction() void SystemLog<LogElement>::savingThreadFunction()

View File

@ -125,8 +125,6 @@ public:
void shutdown() override; void shutdown() override;
void stopFlushThread() override;
/** Creates new table if it does not exist. /** Creates new table if it does not exist.
* Renames old table if its structure is not suitable. * Renames old table if its structure is not suitable.
* This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created. * This cannot be done in constructor to avoid deadlock while renaming a table under locked Context when SystemLog object is created.
@ -136,9 +134,6 @@ public:
protected: protected:
LoggerPtr log; LoggerPtr log;
using ISystemLog::is_shutdown;
using ISystemLog::saving_thread;
using ISystemLog::thread_mutex;
using Base::queue; using Base::queue;
StoragePtr getStorage() const; StoragePtr getStorage() const;

View File

@ -54,13 +54,8 @@ void checkFinalInferredType(
type = default_type; type = default_type;
} }
if (settings.schema_inference_make_columns_nullable) if (settings.schema_inference_make_columns_nullable == 1)
type = makeNullableRecursively(type); type = makeNullableRecursively(type);
/// In case when data for some column could contain nulls and regular values,
/// resulting inferred type is Nullable.
/// If input_format_null_as_default is enabled, we should remove Nullable type.
else if (settings.null_as_default)
type = removeNullable(type);
} }
void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type) void ISchemaReader::transformTypesIfNeeded(DB::DataTypePtr & type, DB::DataTypePtr & new_type)

View File

@ -204,8 +204,11 @@ NamesAndTypesList ArrowSchemaReader::readSchema()
schema = file_reader->schema(); schema = file_reader->schema();
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
*schema, stream ? "ArrowStream" : "Arrow", format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference); *schema,
if (format_settings.schema_inference_make_columns_nullable) stream ? "ArrowStream" : "Arrow",
format_settings.arrow.skip_columns_with_unsupported_types_in_schema_inference,
format_settings.schema_inference_make_columns_nullable != 0);
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header); return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList(); return header.getNamesAndTypesList();
} }

View File

@ -727,6 +727,7 @@ struct ReadColumnFromArrowColumnSettings
FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior; FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior;
bool allow_arrow_null_type; bool allow_arrow_null_type;
bool skip_columns_with_unsupported_types; bool skip_columns_with_unsupported_types;
bool allow_inferring_nullable_columns;
}; };
static ColumnWithTypeAndName readColumnFromArrowColumn( static ColumnWithTypeAndName readColumnFromArrowColumn(
@ -1109,7 +1110,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn(
bool is_map_nested_column, bool is_map_nested_column,
const ReadColumnFromArrowColumnSettings & settings) const ReadColumnFromArrowColumnSettings & settings)
{ {
bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable()); bool read_as_nullable_column = (arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable())) && settings.allow_inferring_nullable_columns;
if (read_as_nullable_column && if (read_as_nullable_column &&
arrow_column->type()->id() != arrow::Type::LIST && arrow_column->type()->id() != arrow::Type::LIST &&
arrow_column->type()->id() != arrow::Type::LARGE_LIST && arrow_column->type()->id() != arrow::Type::LARGE_LIST &&
@ -1173,14 +1174,16 @@ static std::shared_ptr<arrow::ChunkedArray> createArrowColumn(const std::shared_
Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( Block ArrowColumnToCHColumn::arrowSchemaToCHHeader(
const arrow::Schema & schema, const arrow::Schema & schema,
const std::string & format_name, const std::string & format_name,
bool skip_columns_with_unsupported_types) bool skip_columns_with_unsupported_types,
bool allow_inferring_nullable_columns)
{ {
ReadColumnFromArrowColumnSettings settings ReadColumnFromArrowColumnSettings settings
{ {
.format_name = format_name, .format_name = format_name,
.date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore, .date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore,
.allow_arrow_null_type = false, .allow_arrow_null_type = false,
.skip_columns_with_unsupported_types = skip_columns_with_unsupported_types .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types,
.allow_inferring_nullable_columns = allow_inferring_nullable_columns,
}; };
ColumnsWithTypeAndName sample_columns; ColumnsWithTypeAndName sample_columns;
@ -1254,7 +1257,8 @@ Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & nam
.format_name = format_name, .format_name = format_name,
.date_time_overflow_behavior = date_time_overflow_behavior, .date_time_overflow_behavior = date_time_overflow_behavior,
.allow_arrow_null_type = true, .allow_arrow_null_type = true,
.skip_columns_with_unsupported_types = false .skip_columns_with_unsupported_types = false,
.allow_inferring_nullable_columns = true
}; };
Columns columns; Columns columns;

View File

@ -34,7 +34,8 @@ public:
static Block arrowSchemaToCHHeader( static Block arrowSchemaToCHHeader(
const arrow::Schema & schema, const arrow::Schema & schema,
const std::string & format_name, const std::string & format_name,
bool skip_columns_with_unsupported_types = false); bool skip_columns_with_unsupported_types = false,
bool allow_inferring_nullable_columns = true);
struct DictionaryInfo struct DictionaryInfo
{ {

View File

@ -15,8 +15,8 @@ namespace ErrorCodes
} }
template <bool with_defaults> template <bool with_defaults>
BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) BinaryRowInputFormat<with_defaults>::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_)
: RowInputFormatWithNamesAndTypes( : RowInputFormatWithNamesAndTypes<BinaryFormatReader<with_defaults>>(
header, header,
in_, in_,
params_, params_,

View File

@ -10,13 +10,16 @@ namespace DB
class ReadBuffer; class ReadBuffer;
template <bool>
class BinaryFormatReader;
/** A stream for inputting data in a binary line-by-line format. /** A stream for inputting data in a binary line-by-line format.
*/ */
template <bool with_defaults = false> template <bool with_defaults = false>
class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes<BinaryFormatReader<with_defaults>>
{ {
public: public:
BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); BinaryRowInputFormat(ReadBuffer & in_, const Block & header, IRowInputFormat::Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
String getName() const override { return "BinaryRowInputFormat"; } String getName() const override { return "BinaryRowInputFormat"; }

View File

@ -61,7 +61,7 @@ CSVRowInputFormat::CSVRowInputFormat(
bool with_names_, bool with_names_,
bool with_types_, bool with_types_,
const FormatSettings & format_settings_, const FormatSettings & format_settings_,
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_) std::unique_ptr<CSVFormatReader> format_reader_)
: RowInputFormatWithNamesAndTypes( : RowInputFormatWithNamesAndTypes(
header_, header_,
*in_, *in_,

View File

@ -1,7 +1,6 @@
#pragma once #pragma once
#include <optional> #include <optional>
#include <unordered_map>
#include <Core/Block.h> #include <Core/Block.h>
#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h> #include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
@ -13,10 +12,12 @@
namespace DB namespace DB
{ {
class CSVFormatReader;
/** A stream for inputting data in csv format. /** A stream for inputting data in csv format.
* Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values. * Does not conform with https://tools.ietf.org/html/rfc4180 because it skips spaces and tabs between values.
*/ */
class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes class CSVRowInputFormat : public RowInputFormatWithNamesAndTypes<CSVFormatReader>
{ {
public: public:
/** with_names - in the first line the header with column names /** with_names - in the first line the header with column names
@ -32,7 +33,7 @@ public:
protected: protected:
CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_, const Params & params_, CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_, const Params & params_,
bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_); bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr<CSVFormatReader> format_reader_);
CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_buf_, const Params & params_, CSVRowInputFormat(const Block & header_, std::shared_ptr<PeekableReadBuffer> in_buf_, const Params & params_,
bool with_names_, bool with_types_, const FormatSettings & format_settings_); bool with_names_, bool with_types_, const FormatSettings & format_settings_);

View File

@ -9,7 +9,8 @@
namespace DB namespace DB
{ {
class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes class CustomSeparatedFormatReader;
class CustomSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes<CustomSeparatedFormatReader>
{ {
public: public:
CustomSeparatedRowInputFormat( CustomSeparatedRowInputFormat(

View File

@ -11,7 +11,7 @@ namespace DB
{ {
class ReadBuffer; class ReadBuffer;
class JSONCompactEachRowFormatReader;
/** A stream for reading data in a bunch of formats: /** A stream for reading data in a bunch of formats:
* - JSONCompactEachRow * - JSONCompactEachRow
@ -20,7 +20,7 @@ class ReadBuffer;
* - JSONCompactStringsEachRowWithNamesAndTypes * - JSONCompactStringsEachRowWithNamesAndTypes
* *
*/ */
class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes class JSONCompactEachRowRowInputFormat final : public RowInputFormatWithNamesAndTypes<JSONCompactEachRowFormatReader>
{ {
public: public:
JSONCompactEachRowRowInputFormat( JSONCompactEachRowRowInputFormat(

View File

@ -14,7 +14,7 @@ namespace ErrorCodes
JSONCompactRowInputFormat::JSONCompactRowInputFormat( JSONCompactRowInputFormat::JSONCompactRowInputFormat(
const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_) const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_)
: RowInputFormatWithNamesAndTypes( : RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>(
header_, in_, params_, false, false, false, format_settings_, std::make_unique<JSONCompactFormatReader>(in_, format_settings_)) header_, in_, params_, false, false, false, format_settings_, std::make_unique<JSONCompactFormatReader>(in_, format_settings_))
{ {
} }

View File

@ -5,8 +5,8 @@
namespace DB namespace DB
{ {
class JSONCompactFormatReader;
class JSONCompactRowInputFormat final : public RowInputFormatWithNamesAndTypes class JSONCompactRowInputFormat final : public RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>
{ {
public: public:
JSONCompactRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_); JSONCompactRowInputFormat(const Block & header_, ReadBuffer & in_, Params params_, const FormatSettings & format_settings_);

View File

@ -1002,7 +1002,7 @@ NamesAndTypesList NativeORCSchemaReader::readSchema()
header.insert(ColumnWithTypeAndName{type, name}); header.insert(ColumnWithTypeAndName{type, name});
} }
if (format_settings.schema_inference_make_columns_nullable) if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header); return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList(); return header.getNamesAndTypesList();
} }

View File

@ -160,8 +160,11 @@ NamesAndTypesList ORCSchemaReader::readSchema()
{ {
initializeIfNeeded(); initializeIfNeeded();
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
*schema, "ORC", format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference); *schema,
if (format_settings.schema_inference_make_columns_nullable) "ORC",
format_settings.orc.skip_columns_with_unsupported_types_in_schema_inference,
format_settings.schema_inference_make_columns_nullable != 0);
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header); return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList(); return header.getNamesAndTypesList();
} }

View File

@ -869,8 +869,11 @@ NamesAndTypesList ParquetSchemaReader::readSchema()
THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema)); THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(metadata->schema(), &schema));
auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader( auto header = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
*schema, "Parquet", format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference); *schema,
if (format_settings.schema_inference_make_columns_nullable) "Parquet",
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference,
format_settings.schema_inference_make_columns_nullable != 0);
if (format_settings.schema_inference_make_columns_nullable == 1)
return getNamesAndRecursivelyNullableTypes(header); return getNamesAndRecursivelyNullableTypes(header);
return header.getNamesAndTypesList(); return header.getNamesAndTypesList();
} }

View File

@ -10,9 +10,11 @@
namespace DB namespace DB
{ {
class TabSeparatedFormatReader;
/** A stream to input data in tsv format. /** A stream to input data in tsv format.
*/ */
class TabSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes class TabSeparatedRowInputFormat final : public RowInputFormatWithNamesAndTypes<TabSeparatedFormatReader>
{ {
public: public:
/** with_names - the first line is the header with the names of the columns /** with_names - the first line is the header with the names of the columns

View File

@ -1,14 +1,20 @@
#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
#include <Processors/Formats/ISchemaReader.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/DataTypeLowCardinality.h>
#include <IO/ReadHelpers.h> #include <DataTypes/DataTypeNothing.h>
#include <IO/Operators.h> #include <DataTypes/DataTypeNullable.h>
#include <IO/ReadBufferFromString.h>
#include <IO/PeekableReadBuffer.h>
#include <Formats/EscapingRuleUtils.h> #include <Formats/EscapingRuleUtils.h>
#include <IO/Operators.h>
#include <IO/PeekableReadBuffer.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadHelpers.h>
#include <Processors/Formats/ISchemaReader.h>
#include <Processors/Formats/Impl/BinaryRowInputFormat.h>
#include <Processors/Formats/Impl/CSVRowInputFormat.h>
#include <Processors/Formats/Impl/CustomSeparatedRowInputFormat.h>
#include <Processors/Formats/Impl/HiveTextRowInputFormat.h>
#include <Processors/Formats/Impl/JSONCompactRowInputFormat.h>
#include <Processors/Formats/Impl/TabSeparatedRowInputFormat.h>
#include <Processors/Formats/RowInputFormatWithNamesAndTypes.h>
namespace DB namespace DB
@ -44,7 +50,8 @@ namespace
} }
} }
RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( template <typename FormatReaderImpl>
RowInputFormatWithNamesAndTypes<FormatReaderImpl>::RowInputFormatWithNamesAndTypes(
const Block & header_, const Block & header_,
ReadBuffer & in_, ReadBuffer & in_,
const Params & params_, const Params & params_,
@ -52,7 +59,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
bool with_names_, bool with_names_,
bool with_types_, bool with_types_,
const FormatSettings & format_settings_, const FormatSettings & format_settings_,
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_, std::unique_ptr<FormatReaderImpl> format_reader_,
bool try_detect_header_) bool try_detect_header_)
: RowInputFormatWithDiagnosticInfo(header_, in_, params_) : RowInputFormatWithDiagnosticInfo(header_, in_, params_)
, format_settings(format_settings_) , format_settings(format_settings_)
@ -66,7 +73,8 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap(); column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap();
} }
void RowInputFormatWithNamesAndTypes::readPrefix() template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::readPrefix()
{ {
/// Search and remove BOM only in textual formats (CSV, TSV etc), not in binary ones (RowBinary*). /// Search and remove BOM only in textual formats (CSV, TSV etc), not in binary ones (RowBinary*).
/// Also, we assume that column name or type cannot contain BOM, so, if format has header, /// Also, we assume that column name or type cannot contain BOM, so, if format has header,
@ -138,7 +146,8 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
} }
} }
void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & column_names_out, std::vector<String> & type_names_out) template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::tryDetectHeader(std::vector<String> & column_names_out, std::vector<String> & type_names_out)
{ {
auto & read_buf = getReadBuffer(); auto & read_buf = getReadBuffer();
PeekableReadBuffer * peekable_buf = dynamic_cast<PeekableReadBuffer *>(&read_buf); PeekableReadBuffer * peekable_buf = dynamic_cast<PeekableReadBuffer *>(&read_buf);
@ -201,7 +210,8 @@ void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector<String> & colu
peekable_buf->dropCheckpoint(); peekable_buf->dropCheckpoint();
} }
bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadExtension & ext) template <typename FormatReaderImpl>
bool RowInputFormatWithNamesAndTypes<FormatReaderImpl>::readRow(MutableColumns & columns, RowReadExtension & ext)
{ {
if (unlikely(end_of_stream)) if (unlikely(end_of_stream))
return false; return false;
@ -280,7 +290,8 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
return true; return true;
} }
size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size) template <typename FormatReaderImpl>
size_t RowInputFormatWithNamesAndTypes<FormatReaderImpl>::countRows(size_t max_block_size)
{ {
if (unlikely(end_of_stream)) if (unlikely(end_of_stream))
return 0; return 0;
@ -304,7 +315,8 @@ size_t RowInputFormatWithNamesAndTypes::countRows(size_t max_block_size)
return num_rows; return num_rows;
} }
void RowInputFormatWithNamesAndTypes::resetParser() template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::resetParser()
{ {
RowInputFormatWithDiagnosticInfo::resetParser(); RowInputFormatWithDiagnosticInfo::resetParser();
column_mapping->column_indexes_for_input_fields.clear(); column_mapping->column_indexes_for_input_fields.clear();
@ -313,7 +325,8 @@ void RowInputFormatWithNamesAndTypes::resetParser()
end_of_stream = false; end_of_stream = false;
} }
void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column)
{ {
const auto & index = column_mapping->column_indexes_for_input_fields[file_column]; const auto & index = column_mapping->column_indexes_for_input_fields[file_column];
if (index) if (index)
@ -328,7 +341,8 @@ void RowInputFormatWithNamesAndTypes::tryDeserializeField(const DataTypePtr & ty
} }
} }
bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) template <typename FormatReaderImpl>
bool RowInputFormatWithNamesAndTypes<FormatReaderImpl>::parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out)
{ {
if (in->eof()) if (in->eof())
{ {
@ -374,12 +388,14 @@ bool RowInputFormatWithNamesAndTypes::parseRowAndPrintDiagnosticInfo(MutableColu
return format_reader->parseRowEndWithDiagnosticInfo(out); return format_reader->parseRowEndWithDiagnosticInfo(out);
} }
bool RowInputFormatWithNamesAndTypes::isGarbageAfterField(size_t index, ReadBuffer::Position pos) template <typename FormatReaderImpl>
bool RowInputFormatWithNamesAndTypes<FormatReaderImpl>::isGarbageAfterField(size_t index, ReadBuffer::Position pos)
{ {
return format_reader->isGarbageAfterField(index, pos); return format_reader->isGarbageAfterField(index, pos);
} }
void RowInputFormatWithNamesAndTypes::setReadBuffer(ReadBuffer & in_) template <typename FormatReaderImpl>
void RowInputFormatWithNamesAndTypes<FormatReaderImpl>::setReadBuffer(ReadBuffer & in_)
{ {
format_reader->setReadBuffer(in_); format_reader->setReadBuffer(in_);
IInputFormat::setReadBuffer(in_); IInputFormat::setReadBuffer(in_);
@ -582,5 +598,12 @@ void FormatWithNamesAndTypesSchemaReader::transformTypesIfNeeded(DB::DataTypePtr
transformInferredTypesIfNeeded(type, new_type, format_settings); transformInferredTypesIfNeeded(type, new_type, format_settings);
} }
template class RowInputFormatWithNamesAndTypes<JSONCompactFormatReader>;
template class RowInputFormatWithNamesAndTypes<JSONCompactEachRowFormatReader>;
template class RowInputFormatWithNamesAndTypes<TabSeparatedFormatReader>;
template class RowInputFormatWithNamesAndTypes<CSVFormatReader>;
template class RowInputFormatWithNamesAndTypes<CustomSeparatedFormatReader>;
template class RowInputFormatWithNamesAndTypes<BinaryFormatReader<true>>;
template class RowInputFormatWithNamesAndTypes<BinaryFormatReader<false>>;
} }

View File

@ -26,6 +26,7 @@ class FormatWithNamesAndTypesReader;
/// will be compared types from header. /// will be compared types from header.
/// It's important that firstly this class reads/skips names and only /// It's important that firstly this class reads/skips names and only
/// then reads/skips types. So you can this invariant. /// then reads/skips types. So you can this invariant.
template <typename FormatReaderImpl>
class RowInputFormatWithNamesAndTypes : public RowInputFormatWithDiagnosticInfo class RowInputFormatWithNamesAndTypes : public RowInputFormatWithDiagnosticInfo
{ {
protected: protected:
@ -41,7 +42,7 @@ protected:
bool with_names_, bool with_names_,
bool with_types_, bool with_types_,
const FormatSettings & format_settings_, const FormatSettings & format_settings_,
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_, std::unique_ptr<FormatReaderImpl> format_reader_,
bool try_detect_header_ = false); bool try_detect_header_ = false);
void resetParser() override; void resetParser() override;
@ -70,7 +71,7 @@ private:
bool is_header_detected = false; bool is_header_detected = false;
protected: protected:
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader; std::unique_ptr<FormatReaderImpl> format_reader;
Block::NameMap column_indexes_by_names; Block::NameMap column_indexes_by_names;
}; };

View File

@ -706,9 +706,9 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
} }
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns); auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
for (const auto & stats : stats_vec) for (const auto & [stats_column_name, stats] : stats_vec)
{ {
metadata.columns.modify(stats.column_name, metadata.columns.modify(stats_column_name,
[&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); }); [&](ColumnDescription & column) { column.statistics.merge(stats, column.name, column.type, if_not_exists); });
} }
} }
@ -735,14 +735,14 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
{ {
if (!metadata.columns.has(statistics_column_name)) if (!metadata.columns.has(statistics_column_name))
{ {
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot add statistics for column {}: this column is not found", statistics_column_name); throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Cannot modify statistics for column {}: this column is not found", statistics_column_name);
} }
} }
auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns); auto stats_vec = ColumnStatisticsDescription::fromAST(statistics_decl, metadata.columns);
for (const auto & stats : stats_vec) for (const auto & [stats_column_name, stats] : stats_vec)
{ {
metadata.columns.modify(stats.column_name, metadata.columns.modify(stats_column_name,
[&](ColumnDescription & column) { column.statistics.assign(stats); }); [&](ColumnDescription & column) { column.statistics.assign(stats); });
} }
} }
@ -867,8 +867,6 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
rename_visitor.visit(column_to_modify.default_desc.expression); rename_visitor.visit(column_to_modify.default_desc.expression);
if (column_to_modify.ttl) if (column_to_modify.ttl)
rename_visitor.visit(column_to_modify.ttl); rename_visitor.visit(column_to_modify.ttl);
if (column_to_modify.name == column_name && !column_to_modify.statistics.empty())
column_to_modify.statistics.column_name = rename_to;
}); });
} }
if (metadata.table_ttl.definition_ast) if (metadata.table_ttl.definition_ast)

View File

@ -113,7 +113,15 @@ bool ColumnDescription::operator==(const ColumnDescription & other) const
&& ast_to_str(ttl) == ast_to_str(other.ttl); && ast_to_str(ttl) == ast_to_str(other.ttl);
} }
void ColumnDescription::writeText(WriteBuffer & buf) const String formatASTStateAware(IAST & ast, IAST::FormatState & state)
{
WriteBufferFromOwnString buf;
IAST::FormatSettings settings(buf, true, false);
ast.formatImpl(settings, state, IAST::FormatStateStacked());
return buf.str();
}
void ColumnDescription::writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const
{ {
/// NOTE: Serialization format is insane. /// NOTE: Serialization format is insane.
@ -126,20 +134,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
writeChar('\t', buf); writeChar('\t', buf);
DB::writeText(DB::toString(default_desc.kind), buf); DB::writeText(DB::toString(default_desc.kind), buf);
writeChar('\t', buf); writeChar('\t', buf);
writeEscapedString(queryToString(default_desc.expression), buf); writeEscapedString(formatASTStateAware(*default_desc.expression, state), buf);
} }
if (!comment.empty()) if (!comment.empty() && include_comment)
{ {
writeChar('\t', buf); writeChar('\t', buf);
DB::writeText("COMMENT ", buf); DB::writeText("COMMENT ", buf);
writeEscapedString(queryToString(ASTLiteral(Field(comment))), buf); auto ast = ASTLiteral(Field(comment));
writeEscapedString(formatASTStateAware(ast, state), buf);
} }
if (codec) if (codec)
{ {
writeChar('\t', buf); writeChar('\t', buf);
writeEscapedString(queryToString(codec), buf); writeEscapedString(formatASTStateAware(*codec, state), buf);
} }
if (!settings.empty()) if (!settings.empty())
@ -150,21 +159,21 @@ void ColumnDescription::writeText(WriteBuffer & buf) const
ASTSetQuery ast; ASTSetQuery ast;
ast.is_standalone = false; ast.is_standalone = false;
ast.changes = settings; ast.changes = settings;
writeEscapedString(queryToString(ast), buf); writeEscapedString(formatASTStateAware(ast, state), buf);
DB::writeText(")", buf); DB::writeText(")", buf);
} }
if (!statistics.empty()) if (!statistics.empty())
{ {
writeChar('\t', buf); writeChar('\t', buf);
writeEscapedString(queryToString(statistics.getAST()), buf); writeEscapedString(formatASTStateAware(*statistics.getAST(), state), buf);
} }
if (ttl) if (ttl)
{ {
writeChar('\t', buf); writeChar('\t', buf);
DB::writeText("TTL ", buf); DB::writeText("TTL ", buf);
writeEscapedString(queryToString(ttl), buf); writeEscapedString(formatASTStateAware(*ttl, state), buf);
} }
writeChar('\n', buf); writeChar('\n', buf);
@ -209,11 +218,7 @@ void ColumnDescription::readText(ReadBuffer & buf)
settings = col_ast->settings->as<ASTSetQuery &>().changes; settings = col_ast->settings->as<ASTSetQuery &>().changes;
if (col_ast->statistics_desc) if (col_ast->statistics_desc)
{
statistics = ColumnStatisticsDescription::fromColumnDeclaration(*col_ast, type); statistics = ColumnStatisticsDescription::fromColumnDeclaration(*col_ast, type);
/// every column has name `x` here, so we have to set the name manually.
statistics.column_name = name;
}
} }
else else
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description"); throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description");
@ -895,16 +900,17 @@ void ColumnsDescription::resetColumnTTLs()
} }
String ColumnsDescription::toString() const String ColumnsDescription::toString(bool include_comments) const
{ {
WriteBufferFromOwnString buf; WriteBufferFromOwnString buf;
IAST::FormatState ast_format_state;
writeCString("columns format version: 1\n", buf); writeCString("columns format version: 1\n", buf);
DB::writeText(columns.size(), buf); DB::writeText(columns.size(), buf);
writeCString(" columns:\n", buf); writeCString(" columns:\n", buf);
for (const ColumnDescription & column : columns) for (const ColumnDescription & column : columns)
column.writeText(buf); column.writeText(buf, ast_format_state, include_comments);
return buf.str(); return buf.str();
} }

View File

@ -104,7 +104,7 @@ struct ColumnDescription
bool operator==(const ColumnDescription & other) const; bool operator==(const ColumnDescription & other) const;
bool operator!=(const ColumnDescription & other) const { return !(*this == other); } bool operator!=(const ColumnDescription & other) const { return !(*this == other); }
void writeText(WriteBuffer & buf) const; void writeText(WriteBuffer & buf, IAST::FormatState & state, bool include_comment) const;
void readText(ReadBuffer & buf); void readText(ReadBuffer & buf);
}; };
@ -137,7 +137,7 @@ public:
/// NOTE Must correspond with Nested::flatten function. /// NOTE Must correspond with Nested::flatten function.
void flattenNested(); /// TODO: remove, insert already flattened Nested columns. void flattenNested(); /// TODO: remove, insert already flattened Nested columns.
bool operator==(const ColumnsDescription & other) const { return columns == other.columns; } bool operator==(const ColumnsDescription & other) const { return toString(false) == other.toString(false); }
bool operator!=(const ColumnsDescription & other) const { return !(*this == other); } bool operator!=(const ColumnsDescription & other) const { return !(*this == other); }
auto begin() const { return columns.begin(); } auto begin() const { return columns.begin(); }
@ -221,7 +221,7 @@ public:
/// Does column has non default specified compression codec /// Does column has non default specified compression codec
bool hasCompressionCodec(const String & column_name) const; bool hasCompressionCodec(const String & column_name) const;
String toString() const; String toString(bool include_comments = true) const;
static ColumnsDescription parse(const String & str); static ColumnsDescription parse(const String & str);
size_t size() const size_t size() const

View File

@ -444,8 +444,8 @@ StorageHive::StorageHive(
storage_metadata.setComment(comment_); storage_metadata.setComment(comment_);
storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext()); storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, getContext()));
setInMemoryMetadata(storage_metadata); setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), getContext()));
} }
void StorageHive::lazyInitialize() void StorageHive::lazyInitialize()

View File

@ -78,7 +78,7 @@ static ColumnsStatistics getStatisticsForColumns(
const auto * desc = all_columns.tryGet(column.name); const auto * desc = all_columns.tryGet(column.name);
if (desc && !desc->statistics.empty()) if (desc && !desc->statistics.empty())
{ {
auto statistics = MergeTreeStatisticsFactory::instance().get(desc->statistics); auto statistics = MergeTreeStatisticsFactory::instance().get(*desc);
all_statistics.push_back(std::move(statistics)); all_statistics.push_back(std::move(statistics));
} }
} }

View File

@ -553,7 +553,7 @@ static std::set<ColumnStatisticsPtr> getStatisticsToRecalculate(const StorageMet
{ {
if (!col_desc.statistics.empty() && materialized_stats.contains(col_desc.name)) if (!col_desc.statistics.empty() && materialized_stats.contains(col_desc.name))
{ {
stats_to_recalc.insert(stats_factory.get(col_desc.statistics)); stats_to_recalc.insert(stats_factory.get(col_desc));
} }
} }
return stats_to_recalc; return stats_to_recalc;
@ -1434,7 +1434,7 @@ private:
if (ctx->materialized_statistics.contains(col.name)) if (ctx->materialized_statistics.contains(col.name))
{ {
stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col.statistics)); stats_to_rewrite.push_back(MergeTreeStatisticsFactory::instance().get(col));
} }
else else
{ {

View File

@ -538,6 +538,9 @@ static StoragePtr create(const StorageFactory::Arguments & args)
if (replica_name.empty()) if (replica_name.empty())
throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message); throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message);
// '\t' and '\n' will interrupt parsing 'source replica' in ReplicatedMergeTreeLogEntryData::readText
if (replica_name.find('\t') != String::npos || replica_name.find('\n') != String::npos)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must not contain '\\t' or '\\n'");
arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments. arg_cnt = engine_args.size(); /// Update `arg_cnt` here because extractZooKeeperPathAndReplicaNameFromEngineArgs() could add arguments.
arg_num = 2; /// zookeeper_path and replica_name together are always two arguments. arg_num = 2; /// zookeeper_path and replica_name together are always two arguments.

View File

@ -94,7 +94,7 @@ StorageObjectStorage::StorageObjectStorage(
if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning) if (sample_path.empty() && context->getSettingsRef().use_hive_partitioning)
sample_path = getPathSample(metadata, context); sample_path = getPathSample(metadata, context);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context, sample_path, format_settings)); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context, sample_path, format_settings));
setInMemoryMetadata(metadata); setInMemoryMetadata(metadata);
} }

View File

@ -68,7 +68,7 @@ StorageObjectStorageCluster::StorageObjectStorageCluster(
if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning) if (sample_path.empty() && context_->getSettingsRef().use_hive_partitioning)
sample_path = getPathSample(metadata, context_); sample_path = getPathSample(metadata, context_);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.getColumns(), context_, sample_path)); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(metadata.columns, context_, sample_path));
setInMemoryMetadata(metadata); setInMemoryMetadata(metadata);
} }

View File

@ -208,7 +208,7 @@ Chunk StorageObjectStorageSource::generate()
.filename = &filename, .filename = &filename,
.last_modified = object_info->metadata->last_modified, .last_modified = object_info->metadata->last_modified,
.etag = &(object_info->metadata->etag) .etag = &(object_info->metadata->etag)
}, getContext(), read_from_format_info.columns_description); }, getContext());
const auto & partition_columns = configuration->getPartitionColumns(); const auto & partition_columns = configuration->getPartitionColumns();
if (!partition_columns.empty() && chunk_size && chunk.hasColumns()) if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
@ -280,7 +280,7 @@ StorageObjectStorageSource::ReaderHolder StorageObjectStorageSource::createReade
const std::shared_ptr<IIterator> & file_iterator, const std::shared_ptr<IIterator> & file_iterator,
const ConfigurationPtr & configuration, const ConfigurationPtr & configuration,
const ObjectStoragePtr & object_storage, const ObjectStoragePtr & object_storage,
const ReadFromFormatInfo & read_from_format_info, ReadFromFormatInfo & read_from_format_info,
const std::optional<FormatSettings> & format_settings, const std::optional<FormatSettings> & format_settings,
const std::shared_ptr<const KeyCondition> & key_condition_, const std::shared_ptr<const KeyCondition> & key_condition_,
const ContextPtr & context_, const ContextPtr & context_,

View File

@ -74,7 +74,7 @@ protected:
const UInt64 max_block_size; const UInt64 max_block_size;
const bool need_only_count; const bool need_only_count;
const size_t max_parsing_threads; const size_t max_parsing_threads;
const ReadFromFormatInfo read_from_format_info; ReadFromFormatInfo read_from_format_info;
const std::shared_ptr<ThreadPool> create_reader_pool; const std::shared_ptr<ThreadPool> create_reader_pool;
std::shared_ptr<IIterator> file_iterator; std::shared_ptr<IIterator> file_iterator;
@ -122,7 +122,7 @@ protected:
const std::shared_ptr<IIterator> & file_iterator, const std::shared_ptr<IIterator> & file_iterator,
const ConfigurationPtr & configuration, const ConfigurationPtr & configuration,
const ObjectStoragePtr & object_storage, const ObjectStoragePtr & object_storage,
const ReadFromFormatInfo & read_from_format_info, ReadFromFormatInfo & read_from_format_info,
const std::optional<FormatSettings> & format_settings, const std::optional<FormatSettings> & format_settings,
const std::shared_ptr<const KeyCondition> & key_condition_, const std::shared_ptr<const KeyCondition> & key_condition_,
const ContextPtr & context_, const ContextPtr & context_,

View File

@ -524,7 +524,7 @@ Chunk ObjectStorageQueueSource::generateImpl()
{ {
.path = path, .path = path,
.size = reader.getObjectInfo()->metadata->size_bytes .size = reader.getObjectInfo()->metadata->size_bytes
}, getContext(), read_from_format_info.columns_description); }, getContext());
return chunk; return chunk;
} }

View File

@ -128,7 +128,7 @@ private:
const std::shared_ptr<FileIterator> file_iterator; const std::shared_ptr<FileIterator> file_iterator;
const ConfigurationPtr configuration; const ConfigurationPtr configuration;
const ObjectStoragePtr object_storage; const ObjectStoragePtr object_storage;
const ReadFromFormatInfo read_from_format_info; ReadFromFormatInfo read_from_format_info;
const std::optional<FormatSettings> format_settings; const std::optional<FormatSettings> format_settings;
const ObjectStorageQueueSettings queue_settings; const ObjectStorageQueueSettings queue_settings;
const std::shared_ptr<ObjectStorageQueueMetadata> files_metadata; const std::shared_ptr<ObjectStorageQueueMetadata> files_metadata;

View File

@ -169,7 +169,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue(
storage_metadata.setColumns(columns); storage_metadata.setColumns(columns);
storage_metadata.setConstraints(constraints_); storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment); storage_metadata.setComment(comment);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_)); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_));
setInMemoryMetadata(storage_metadata); setInMemoryMetadata(storage_metadata);
LOG_INFO(log, "Using zookeeper path: {}", zk_path.string()); LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());

View File

@ -58,8 +58,8 @@ IStatistics::IStatistics(const SingleStatisticsDescription & stat_)
{ {
} }
ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_) ColumnStatistics::ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_)
: stats_desc(stats_desc_) : stats_desc(stats_desc_), column_name(column_name_)
{ {
} }
@ -176,7 +176,7 @@ String ColumnStatistics::getFileName() const
const String & ColumnStatistics::columnName() const const String & ColumnStatistics::columnName() const
{ {
return stats_desc.column_name; return column_name;
} }
UInt64 ColumnStatistics::rowCount() const UInt64 ColumnStatistics::rowCount() const
@ -227,15 +227,15 @@ void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & st
} }
} }
ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnStatisticsDescription & stats) const ColumnStatisticsPtr MergeTreeStatisticsFactory::get(const ColumnDescription & column_desc) const
{ {
ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(stats); ColumnStatisticsPtr column_stat = std::make_shared<ColumnStatistics>(column_desc.statistics, column_desc.name);
for (const auto & [type, desc] : stats.types_to_desc) for (const auto & [type, desc] : column_desc.statistics.types_to_desc)
{ {
auto it = creators.find(type); auto it = creators.find(type);
if (it == creators.end()) if (it == creators.end())
throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type); throw Exception(ErrorCodes::INCORRECT_QUERY, "Unknown statistic type '{}'. Available types: 'tdigest' 'uniq' and 'count_min'", type);
auto stat_ptr = (it->second)(desc, stats.data_type); auto stat_ptr = (it->second)(desc, column_desc.type);
column_stat->stats[type] = stat_ptr; column_stat->stats[type] = stat_ptr;
} }
return column_stat; return column_stat;
@ -246,7 +246,7 @@ ColumnsStatistics MergeTreeStatisticsFactory::getMany(const ColumnsDescription &
ColumnsStatistics result; ColumnsStatistics result;
for (const auto & col : columns) for (const auto & col : columns)
if (!col.statistics.empty()) if (!col.statistics.empty())
result.push_back(get(col.statistics)); result.push_back(get(col));
return result; return result;
} }

View File

@ -54,7 +54,7 @@ using StatisticsPtr = std::shared_ptr<IStatistics>;
class ColumnStatistics class ColumnStatistics
{ {
public: public:
explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_); explicit ColumnStatistics(const ColumnStatisticsDescription & stats_desc_, const String & column_name_);
void serialize(WriteBuffer & buf); void serialize(WriteBuffer & buf);
void deserialize(ReadBuffer & buf); void deserialize(ReadBuffer & buf);
@ -73,10 +73,12 @@ public:
private: private:
friend class MergeTreeStatisticsFactory; friend class MergeTreeStatisticsFactory;
ColumnStatisticsDescription stats_desc; ColumnStatisticsDescription stats_desc;
String column_name;
std::map<StatisticsType, StatisticsPtr> stats; std::map<StatisticsType, StatisticsPtr> stats;
UInt64 rows = 0; /// the number of rows in the column UInt64 rows = 0; /// the number of rows in the column
}; };
struct ColumnDescription;
class ColumnsDescription; class ColumnsDescription;
using ColumnStatisticsPtr = std::shared_ptr<ColumnStatistics>; using ColumnStatisticsPtr = std::shared_ptr<ColumnStatistics>;
using ColumnsStatistics = std::vector<ColumnStatisticsPtr>; using ColumnsStatistics = std::vector<ColumnStatisticsPtr>;
@ -91,7 +93,7 @@ public:
using Validator = std::function<void(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>; using Validator = std::function<void(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
using Creator = std::function<StatisticsPtr(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>; using Creator = std::function<StatisticsPtr(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const; ColumnStatisticsPtr get(const ColumnDescription & column_desc) const;
ColumnsStatistics getMany(const ColumnsDescription & columns) const; ColumnsStatistics getMany(const ColumnsDescription & columns) const;
void registerValidator(StatisticsType type, Validator validator); void registerValidator(StatisticsType type, Validator validator);

View File

@ -6,7 +6,6 @@
#include <Parsers/ASTStatisticsDeclaration.h> #include <Parsers/ASTStatisticsDeclaration.h>
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Parsers/ParserCreateQuery.h> #include <Parsers/ParserCreateQuery.h>
#include <Poco/Logger.h>
#include <Storages/ColumnsDescription.h> #include <Storages/ColumnsDescription.h>
@ -97,16 +96,13 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
{ {
chassert(merging_column_type); chassert(merging_column_type);
if (column_name.empty())
column_name = merging_column_name;
data_type = merging_column_type; data_type = merging_column_type;
for (const auto & [stats_type, stats_desc]: other.types_to_desc) for (const auto & [stats_type, stats_desc]: other.types_to_desc)
{ {
if (!if_not_exists && types_to_desc.contains(stats_type)) if (!if_not_exists && types_to_desc.contains(stats_type))
{ {
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, column_name); throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics type name {} has existed in column {}", stats_type, merging_column_name);
} }
else if (!types_to_desc.contains(stats_type)) else if (!types_to_desc.contains(stats_type))
types_to_desc.emplace(stats_type, stats_desc); types_to_desc.emplace(stats_type, stats_desc);
@ -115,9 +111,6 @@ void ColumnStatisticsDescription::merge(const ColumnStatisticsDescription & othe
void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other) void ColumnStatisticsDescription::assign(const ColumnStatisticsDescription & other)
{ {
if (other.column_name != column_name)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot assign statistics from column {} to {}", column_name, other.column_name);
types_to_desc = other.types_to_desc; types_to_desc = other.types_to_desc;
data_type = other.data_type; data_type = other.data_type;
} }
@ -127,7 +120,7 @@ void ColumnStatisticsDescription::clear()
types_to_desc.clear(); types_to_desc.clear();
} }
std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns) std::vector<std::pair<String, ColumnStatisticsDescription>> ColumnStatisticsDescription::fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns)
{ {
const auto * stat_definition_ast = definition_ast->as<ASTStatisticsDeclaration>(); const auto * stat_definition_ast = definition_ast->as<ASTStatisticsDeclaration>();
if (!stat_definition_ast) if (!stat_definition_ast)
@ -145,7 +138,7 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
statistics_types.emplace(stat.type, stat); statistics_types.emplace(stat.type, stat);
} }
std::vector<ColumnStatisticsDescription> result; std::vector<std::pair<String, ColumnStatisticsDescription>> result;
result.reserve(stat_definition_ast->columns->children.size()); result.reserve(stat_definition_ast->columns->children.size());
for (const auto & column_ast : stat_definition_ast->columns->children) for (const auto & column_ast : stat_definition_ast->columns->children)
@ -157,10 +150,9 @@ std::vector<ColumnStatisticsDescription> ColumnStatisticsDescription::fromAST(co
throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", physical_column_name); throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect column name {}", physical_column_name);
const auto & column = columns.getPhysical(physical_column_name); const auto & column = columns.getPhysical(physical_column_name);
stats.column_name = column.name;
stats.data_type = column.type; stats.data_type = column.type;
stats.types_to_desc = statistics_types; stats.types_to_desc = statistics_types;
result.push_back(stats); result.emplace_back(physical_column_name, stats);
} }
if (result.empty()) if (result.empty())
@ -175,14 +167,13 @@ ColumnStatisticsDescription ColumnStatisticsDescription::fromColumnDeclaration(c
if (stat_type_list_ast->children.empty()) if (stat_type_list_ast->children.empty())
throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect at least one statistics type for column {}", queryToString(column)); throw Exception(ErrorCodes::INCORRECT_QUERY, "We expect at least one statistics type for column {}", queryToString(column));
ColumnStatisticsDescription stats; ColumnStatisticsDescription stats;
stats.column_name = column.name;
for (const auto & ast : stat_type_list_ast->children) for (const auto & ast : stat_type_list_ast->children)
{ {
const auto & stat_type = ast->as<const ASTFunction &>().name; const auto & stat_type = ast->as<const ASTFunction &>().name;
SingleStatisticsDescription stat(stringToStatisticsType(Poco::toLower(stat_type)), ast->clone()); SingleStatisticsDescription stat(stringToStatisticsType(Poco::toLower(stat_type)), ast->clone());
if (stats.types_to_desc.contains(stat.type)) if (stats.types_to_desc.contains(stat.type))
throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", stats.column_name, stat_type); throw Exception(ErrorCodes::INCORRECT_QUERY, "Column {} already contains statistics type {}", column.name, stat_type);
stats.types_to_desc.emplace(stat.type, std::move(stat)); stats.types_to_desc.emplace(stat.type, std::move(stat));
} }
stats.data_type = data_type; stats.data_type = data_type;

View File

@ -55,12 +55,12 @@ struct ColumnStatisticsDescription
ASTPtr getAST() const; ASTPtr getAST() const;
static std::vector<ColumnStatisticsDescription> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns); /// get a vector of <column name, statistics desc> pair
static std::vector<std::pair<String, ColumnStatisticsDescription>> fromAST(const ASTPtr & definition_ast, const ColumnsDescription & columns);
static ColumnStatisticsDescription fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type); static ColumnStatisticsDescription fromColumnDeclaration(const ASTColumnDeclaration & column, DataTypePtr data_type);
using StatisticsTypeDescMap = std::map<StatisticsType, SingleStatisticsDescription>; using StatisticsTypeDescMap = std::map<StatisticsType, SingleStatisticsDescription>;
StatisticsTypeDescMap types_to_desc; StatisticsTypeDescMap types_to_desc;
String column_name;
DataTypePtr data_type; DataTypePtr data_type;
}; };

View File

@ -290,6 +290,10 @@ VirtualColumnsDescription StorageDistributed::createVirtuals()
desc.addEphemeral("_shard_num", std::make_shared<DataTypeUInt32>(), "Deprecated. Use function shardNum instead"); desc.addEphemeral("_shard_num", std::make_shared<DataTypeUInt32>(), "Deprecated. Use function shardNum instead");
/// Add virtual columns from table with Merge engine.
desc.addEphemeral("_database", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of database which the row comes from");
desc.addEphemeral("_table", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "The name of table which the row comes from");
return desc; return desc;
} }

View File

@ -1112,9 +1112,9 @@ void StorageFile::setStorageMetadata(CommonArguments args)
storage_metadata.setConstraints(args.constraints); storage_metadata.setConstraints(args.constraints);
storage_metadata.setComment(args.comment); storage_metadata.setComment(args.comment);
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), args.getContext(), paths.empty() ? "" : paths[0], format_settings)); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, args.getContext(), paths.empty() ? "" : paths[0], format_settings));
setInMemoryMetadata(storage_metadata);
} }
@ -1468,7 +1468,7 @@ Chunk StorageFileSource::generate()
.size = current_file_size, .size = current_file_size,
.filename = (filename_override.has_value() ? &filename_override.value() : nullptr), .filename = (filename_override.has_value() ? &filename_override.value() : nullptr),
.last_modified = current_file_last_modified .last_modified = current_file_last_modified
}, getContext(), columns_description); }, getContext());
return chunk; return chunk;
} }

View File

@ -60,8 +60,8 @@ StorageFileCluster::StorageFileCluster(
} }
storage_metadata.setConstraints(constraints_); storage_metadata.setConstraints(constraints_);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, paths.empty() ? "" : paths[0]));
setInMemoryMetadata(storage_metadata); setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, paths.empty() ? "" : paths[0]));
} }
void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context) void StorageFileCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)

View File

@ -642,10 +642,6 @@ std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQ
column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
} }
} }
else
{
}
auto child = createPlanForTable( auto child = createPlanForTable(
nested_storage_snaphsot, nested_storage_snaphsot,
@ -657,6 +653,7 @@ std::vector<ReadFromMerge::ChildPlan> ReadFromMerge::createChildrenPlans(SelectQ
row_policy_data_opt, row_policy_data_opt,
modified_context, modified_context,
current_streams); current_streams);
child.plan.addInterpreterContext(modified_context); child.plan.addInterpreterContext(modified_context);
if (child.plan.isInitialized()) if (child.plan.isInitialized())
@ -914,12 +911,14 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo
modified_query_info.table_expression = replacement_table_expression; modified_query_info.table_expression = replacement_table_expression;
modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression); modified_query_info.planner_context->getOrCreateTableExpressionData(replacement_table_expression);
auto get_column_options = GetColumnsOptions(GetColumnsOptions::All).withExtendedObjects().withVirtuals(); auto get_column_options = GetColumnsOptions(GetColumnsOptions::All)
if (storage_snapshot_->storage.supportsSubcolumns()) .withExtendedObjects()
get_column_options.withSubcolumns(); .withSubcolumns(storage_snapshot_->storage.supportsSubcolumns());
std::unordered_map<std::string, QueryTreeNodePtr> column_name_to_node; std::unordered_map<std::string, QueryTreeNodePtr> column_name_to_node;
/// Consider only non-virtual columns of storage while checking for _table and _database columns.
/// I.e. always override virtual columns with these names from underlying table (if any).
if (!storage_snapshot_->tryGetColumn(get_column_options, "_table")) if (!storage_snapshot_->tryGetColumn(get_column_options, "_table"))
{ {
auto table_name_node = std::make_shared<ConstantNode>(current_storage_id.table_name); auto table_name_node = std::make_shared<ConstantNode>(current_storage_id.table_name);
@ -946,6 +945,7 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo
column_name_to_node.emplace("_database", function_node); column_name_to_node.emplace("_database", function_node);
} }
get_column_options.withVirtuals();
auto storage_columns = storage_snapshot_->metadata->getColumns(); auto storage_columns = storage_snapshot_->metadata->getColumns();
bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty(); bool with_aliases = /* common_processed_stage == QueryProcessingStage::FetchColumns && */ !storage_columns.getAliases().empty();

View File

@ -6340,7 +6340,7 @@ void StorageReplicatedMergeTree::alter(
"Metadata on replica is not up to date with common metadata in Zookeeper. " "Metadata on replica is not up to date with common metadata in Zookeeper. "
"It means that this replica still not applied some of previous alters." "It means that this replica still not applied some of previous alters."
" Probably too many alters executing concurrently (highly not recommended). " " Probably too many alters executing concurrently (highly not recommended). "
"You can retry the query"); "You can retry this error");
/// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level.
if (query_context->getZooKeeperMetadataTransaction()) if (query_context->getZooKeeperMetadataTransaction())

View File

@ -165,9 +165,9 @@ IStorageURLBase::IStorageURLBase(
storage_metadata.setConstraints(constraints_); storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment); storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context_, getSampleURI(uri, context_), format_settings)); setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context_, getSampleURI(uri, context_), format_settings));
setInMemoryMetadata(storage_metadata);
} }
@ -435,7 +435,7 @@ Chunk StorageURLSource::generate()
{ {
.path = curr_uri.getPath(), .path = curr_uri.getPath(),
.size = current_file_size, .size = current_file_size,
}, getContext(), columns_description); }, getContext());
return chunk; return chunk;
} }

View File

@ -75,8 +75,8 @@ StorageURLCluster::StorageURLCluster(
} }
storage_metadata.setConstraints(constraints_); storage_metadata.setConstraints(constraints_);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.columns, context, getSampleURI(uri, context)));
setInMemoryMetadata(storage_metadata); setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns(), context, getSampleURI(uri, context)));
} }
void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context) void StorageURLCluster::updateQueryToSendIfNeeded(ASTPtr & query, const StorageSnapshotPtr & storage_snapshot, const ContextPtr & context)

View File

@ -129,36 +129,45 @@ NameSet getVirtualNamesForFileLikeStorage()
return {"_path", "_file", "_size", "_time", "_etag"}; return {"_path", "_file", "_size", "_time", "_etag"};
} }
std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path, const ColumnsDescription & storage_columns) std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
{ {
std::string pattern = "([^/]+)=([^/]+)/"; std::string pattern = "([^/]+)=([^/]+)/";
re2::StringPiece input_piece(path); re2::StringPiece input_piece(path);
std::unordered_map<std::string, std::string> key_values; std::unordered_map<std::string, std::string> key_values;
std::string key, value; std::string key, value;
std::unordered_set<String> used_keys; std::unordered_map<std::string, std::string> used_keys;
while (RE2::FindAndConsume(&input_piece, pattern, &key, &value)) while (RE2::FindAndConsume(&input_piece, pattern, &key, &value))
{ {
if (used_keys.contains(key)) auto it = used_keys.find(key);
throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {}, only unique keys are allowed", path, key); if (it != used_keys.end() && it->second != value)
used_keys.insert(key); throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, key);
used_keys.insert({key, value});
auto col_name = "_" + key; auto col_name = key;
while (storage_columns.has(col_name))
col_name = "_" + col_name;
key_values[col_name] = value; key_values[col_name] = value;
} }
return key_values; return key_values;
} }
VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_) VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
{ {
VirtualColumnsDescription desc; VirtualColumnsDescription desc;
auto add_virtual = [&](const auto & name, const auto & type) auto add_virtual = [&](const auto & name, const auto & type)
{ {
if (storage_columns.has(name)) if (storage_columns.has(name))
{
if (!context->getSettingsRef().use_hive_partitioning)
return;
if (storage_columns.size() == 1)
throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot use hive partitioning for file {}: it contains only partition columns. Disable use_hive_partitioning setting to read this file", path);
auto local_type = storage_columns.get(name).type;
storage_columns.remove(name);
desc.addEphemeral(name, local_type, "");
return; return;
}
desc.addEphemeral(name, type, ""); desc.addEphemeral(name, type, "");
}; };
@ -171,7 +180,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription
if (context->getSettingsRef().use_hive_partitioning) if (context->getSettingsRef().use_hive_partitioning)
{ {
auto map = parseHivePartitioningKeysAndValues(path, storage_columns); auto map = parseHivePartitioningKeysAndValues(path);
auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context); auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
for (auto & item : map) for (auto & item : map)
{ {
@ -244,11 +253,11 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector<String> & paths, const
void addRequestedFileLikeStorageVirtualsToChunk( void addRequestedFileLikeStorageVirtualsToChunk(
Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns) VirtualsForFileLikeStorage virtual_values, ContextPtr context)
{ {
std::unordered_map<std::string, std::string> hive_map; std::unordered_map<std::string, std::string> hive_map;
if (context->getSettingsRef().use_hive_partitioning) if (context->getSettingsRef().use_hive_partitioning)
hive_map = parseHivePartitioningKeysAndValues(virtual_values.path, columns); hive_map = parseHivePartitioningKeysAndValues(virtual_values.path);
for (const auto & virtual_column : requested_virtual_columns) for (const auto & virtual_column : requested_virtual_columns)
{ {

View File

@ -70,7 +70,7 @@ auto extractSingleValueFromBlock(const Block & block, const String & name)
NameSet getVirtualNamesForFileLikeStorage(); NameSet getVirtualNamesForFileLikeStorage();
VirtualColumnsDescription getVirtualsForFileLikeStorage( VirtualColumnsDescription getVirtualsForFileLikeStorage(
const ColumnsDescription & storage_columns, ColumnsDescription & storage_columns,
const ContextPtr & context, const ContextPtr & context,
const std::string & sample_path = "", const std::string & sample_path = "",
std::optional<FormatSettings> format_settings_ = std::nullopt); std::optional<FormatSettings> format_settings_ = std::nullopt);
@ -105,7 +105,7 @@ struct VirtualsForFileLikeStorage
void addRequestedFileLikeStorageVirtualsToChunk( void addRequestedFileLikeStorageVirtualsToChunk(
Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
VirtualsForFileLikeStorage virtual_values, ContextPtr context, const ColumnsDescription & columns); VirtualsForFileLikeStorage virtual_values, ContextPtr context);
} }
} }

View File

@ -333,7 +333,10 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
CI.JobNames.BUILD_CHECK, CI.JobNames.BUILD_CHECK,
): # we might want to rerun build report job ): # we might want to rerun build report job
rerun_helper = RerunHelper(commit, _get_ext_check_name(job_name)) rerun_helper = RerunHelper(commit, _get_ext_check_name(job_name))
if rerun_helper.is_already_finished_by_status(): if (
rerun_helper.is_already_finished_by_status()
and not Utils.is_job_triggered_manually()
):
print("WARNING: Rerunning job with GH status ") print("WARNING: Rerunning job with GH status ")
status = rerun_helper.get_finished_status() status = rerun_helper.get_finished_status()
assert status assert status
@ -344,7 +347,7 @@ def _pre_action(s3, job_name, batch, indata, pr_info):
skip_status = status.state skip_status = status.state
# ci cache check # ci cache check
if not to_be_skipped and not no_cache: if not to_be_skipped and not no_cache and not Utils.is_job_triggered_manually():
ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update() ci_cache = CiCache(s3, indata["jobs_data"]["digests"]).update()
job_config = CI.get_job_config(job_name) job_config = CI.get_job_config(job_name)
if ci_cache.is_successful( if ci_cache.is_successful(

View File

@ -501,9 +501,10 @@ class CI:
JobNames.SQLANCER_DEBUG: CommonJobConfigs.SQLLANCER_TEST.with_properties( JobNames.SQLANCER_DEBUG: CommonJobConfigs.SQLLANCER_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_DEBUG], required_builds=[BuildNames.PACKAGE_DEBUG],
), ),
JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties( # TODO: job does not work at all, uncomment and fix
required_builds=[BuildNames.PACKAGE_RELEASE], # JobNames.SQL_LOGIC_TEST: CommonJobConfigs.SQLLOGIC_TEST.with_properties(
), # required_builds=[BuildNames.PACKAGE_RELEASE],
# ),
JobNames.SQLTEST: CommonJobConfigs.SQL_TEST.with_properties( JobNames.SQLTEST: CommonJobConfigs.SQL_TEST.with_properties(
required_builds=[BuildNames.PACKAGE_RELEASE], required_builds=[BuildNames.PACKAGE_RELEASE],
), ),

View File

@ -204,7 +204,7 @@ class JobNames(metaclass=WithIter):
PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)" PERFORMANCE_TEST_AMD64 = "Performance Comparison (release)"
PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)" PERFORMANCE_TEST_ARM64 = "Performance Comparison (aarch64)"
SQL_LOGIC_TEST = "Sqllogic test (release)" # SQL_LOGIC_TEST = "Sqllogic test (release)"
SQLANCER = "SQLancer (release)" SQLANCER = "SQLancer (release)"
SQLANCER_DEBUG = "SQLancer (debug)" SQLANCER_DEBUG = "SQLancer (debug)"

View File

@ -18,6 +18,7 @@ class Envs:
) )
S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds") S3_BUILDS_BUCKET = os.getenv("S3_BUILDS_BUCKET", "clickhouse-builds")
GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "") GITHUB_WORKFLOW = os.getenv("GITHUB_WORKFLOW", "")
GITHUB_ACTOR = os.getenv("GITHUB_ACTOR", "")
class WithIter(type): class WithIter(type):
@ -282,3 +283,7 @@ class Utils:
): ):
res = res.replace(*r) res = res.replace(*r)
return res return res
@staticmethod
def is_job_triggered_manually():
return "robot" not in Envs.GITHUB_ACTOR

View File

@ -31,7 +31,7 @@ IMAGE_NAME = "clickhouse/sqllogic-test"
def get_run_command( def get_run_command(
builds_path: Path, builds_path: Path,
repo_tests_path: Path, repo_path: Path,
result_path: Path, result_path: Path,
server_log_path: Path, server_log_path: Path,
image: DockerImage, image: DockerImage,
@ -39,11 +39,11 @@ def get_run_command(
return ( return (
f"docker run " f"docker run "
f"--volume={builds_path}:/package_folder " f"--volume={builds_path}:/package_folder "
f"--volume={repo_tests_path}:/clickhouse-tests " f"--volume={repo_path}:/repo "
f"--volume={result_path}:/test_output " f"--volume={result_path}:/test_output "
f"--volume={server_log_path}:/var/log/clickhouse-server " f"--volume={server_log_path}:/var/log/clickhouse-server "
"--security-opt seccomp=unconfined " # required to issue io_uring sys-calls "--security-opt seccomp=unconfined " # required to issue io_uring sys-calls
f"--cap-add=SYS_PTRACE {image}" f"--cap-add=SYS_PTRACE {image} /repo/tests/docker_scripts/sqllogic_runner.sh"
) )
@ -94,8 +94,6 @@ def main():
docker_image = pull_image(get_docker_image(IMAGE_NAME)) docker_image = pull_image(get_docker_image(IMAGE_NAME))
repo_tests_path = repo_path / "tests"
packages_path = temp_path / "packages" packages_path = temp_path / "packages"
packages_path.mkdir(parents=True, exist_ok=True) packages_path.mkdir(parents=True, exist_ok=True)
@ -111,7 +109,7 @@ def main():
run_command = get_run_command( # run script inside docker run_command = get_run_command( # run script inside docker
packages_path, packages_path,
repo_tests_path, repo_path,
result_path, result_path,
server_log_path, server_log_path,
docker_image, docker_image,

View File

@ -60,7 +60,6 @@ MESSAGES_TO_RETRY = [
"is already started to be removing by another replica right now", "is already started to be removing by another replica right now",
# This is from LSan, and it indicates its own internal problem: # This is from LSan, and it indicates its own internal problem:
"Unable to get registers from thread", "Unable to get registers from thread",
"You can retry",
] ]
MAX_RETRIES = 3 MAX_RETRIES = 3
@ -3567,7 +3566,7 @@ if __name__ == "__main__":
f"Cannot access the specified directory with queries ({args.queries})", f"Cannot access the specified directory with queries ({args.queries})",
file=sys.stderr, file=sys.stderr,
) )
sys.exit(1) assert False, "No --queries provided"
CAPTURE_CLIENT_STACKTRACE = args.capture_client_stacktrace CAPTURE_CLIENT_STACKTRACE = args.capture_client_stacktrace

Some files were not shown because too many files have changed in this diff Show More