Merge branch 'master' into constant-folding-scalar-subqueries

This commit is contained in:
Nikolai Kochetov 2024-03-05 12:14:54 +00:00
commit d4570aa3b2
367 changed files with 5877 additions and 3037 deletions

View File

@ -142,9 +142,6 @@ jobs:
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -153,9 +150,6 @@ jobs:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
@ -172,14 +166,8 @@ jobs:
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
runner_type: style-checker
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
additional_envs: |
NEEDS_DATA<<NDENV
${{ toJSON(needs) }}
NDENV
run_command: |
python3 build_report_check.py "$CHECK_NAME"
BuilderSpecialReport:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
@ -190,14 +178,8 @@ jobs:
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
additional_envs: |
NEEDS_DATA<<NDENV
${{ toJSON(needs) }}
NDENV
run_command: |
python3 build_report_check.py "$CHECK_NAME"
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################

View File

@ -253,9 +253,6 @@ jobs:
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -264,9 +261,6 @@ jobs:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
@ -275,26 +269,18 @@ jobs:
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderBinRelease
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebDebug
- BuilderDebMsan
- BuilderDebRelease
- BuilderDebReleaseCoverage
- BuilderDebTsan
- BuilderDebUBsan
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
runner_type: style-checker
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
additional_envs: |
NEEDS_DATA<<NDENV
${{ toJSON(needs) }}
NDENV
run_command: |
python3 build_report_check.py "$CHECK_NAME"
BuilderSpecialReport:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
@ -311,17 +297,13 @@ jobs:
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderBinAmd64Musl
- BuilderDebReleaseCoverage
- BuilderBinRelease
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
additional_envs: |
NEEDS_DATA<<NDENV
${{ toJSON(needs) }}
NDENV
run_command: |
python3 build_report_check.py "$CHECK_NAME"
MarkReleaseReady:
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
needs:

View File

@ -285,9 +285,6 @@ jobs:
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -296,9 +293,6 @@ jobs:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
@ -307,13 +301,11 @@ jobs:
if: ${{ !cancelled() }}
needs:
- RunConfig
- BuilderBinRelease
- BuilderDebAarch64
- BuilderDebAsan
- BuilderDebDebug
- BuilderDebMsan
- BuilderDebRelease
- BuilderDebReleaseCoverage
- BuilderDebTsan
- BuilderDebUBsan
uses: ./.github/workflows/reusable_test.yml
@ -321,12 +313,6 @@ jobs:
test_name: ClickHouse build check
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
additional_envs: |
NEEDS_DATA<<NDENV
${{ toJSON(needs) }}
NDENV
run_command: |
python3 build_report_check.py "$CHECK_NAME"
BuilderSpecialReport:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
@ -342,17 +328,13 @@ jobs:
- BuilderBinAmd64Compat
- BuilderBinAarch64V80Compat
- BuilderBinClangTidy
- BuilderDebReleaseCoverage
- BuilderBinRelease
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
additional_envs: |
NEEDS_DATA<<NDENV
${{ toJSON(needs) }}
NDENV
run_command: |
python3 build_report_check.py "$CHECK_NAME"
############################################################################################
#################################### INSTALL PACKAGES ######################################
############################################################################################

View File

@ -54,10 +54,13 @@ jobs:
- name: Check docker clickhouse/clickhouse-server building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \
--image-repo clickhouse/clickhouse-server --image-path docker/server
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper
export CHECK_NAME="Docker server image"
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --check-name "$CHECK_NAME" --push
- name: Check docker clickhouse/clickhouse-keeper building
run: |
cd "$GITHUB_WORKSPACE/tests/ci"
export CHECK_NAME="Docker keeper image"
python3 docker_server.py --release-type auto --version "$GITHUB_TAG" --check-name "$CHECK_NAME" --push
- name: Cleanup
if: always()
run: |

View File

@ -165,9 +165,6 @@ jobs:
test_name: Docker server image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-server --image-path docker/server --allow-build-reuse
DockerKeeperImage:
needs: [RunConfig, BuilderDebRelease, BuilderDebAarch64]
if: ${{ !failure() && !cancelled() }}
@ -176,9 +173,6 @@ jobs:
test_name: Docker keeper image
runner_type: style-checker
data: ${{ needs.RunConfig.outputs.data }}
run_command: |
python3 docker_server.py --release-type head --no-push \
--image-repo clickhouse/clickhouse-keeper --image-path docker/keeper --allow-build-reuse
############################################################################################
##################################### BUILD REPORTER #######################################
############################################################################################
@ -197,14 +191,8 @@ jobs:
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse build check
runner_type: style-checker
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
additional_envs: |
NEEDS_DATA<<NDENV
${{ toJSON(needs) }}
NDENV
run_command: |
python3 build_report_check.py "$CHECK_NAME"
BuilderSpecialReport:
# run report check for failed builds to indicate the CI error
if: ${{ !cancelled() }}
@ -215,14 +203,8 @@ jobs:
uses: ./.github/workflows/reusable_test.yml
with:
test_name: ClickHouse special build check
runner_type: style-checker
runner_type: style-checker-aarch64
data: ${{ needs.RunConfig.outputs.data }}
additional_envs: |
NEEDS_DATA<<NDENV
${{ toJSON(needs) }}
NDENV
run_command: |
python3 build_report_check.py "$CHECK_NAME"
MarkReleaseReady:
if: ${{ ! (contains(needs.*.result, 'skipped') || contains(needs.*.result, 'failure')) }}
needs:
@ -230,7 +212,7 @@ jobs:
- BuilderBinDarwinAarch64
- BuilderDebRelease
- BuilderDebAarch64
runs-on: [self-hosted, style-checker]
runs-on: [self-hosted, style-checker-aarch64]
steps:
- name: Check out repository code
uses: ClickHouse/checkout@v1

View File

@ -22,17 +22,18 @@
* Backup & Restore support for `AzureBlobStorage`. Resolves [#50747](https://github.com/ClickHouse/ClickHouse/issues/50747). [#56988](https://github.com/ClickHouse/ClickHouse/pull/56988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* The user can now specify the template string directly in the query using `format_schema_rows_template` as an alternative to `format_template_row`. Closes [#31363](https://github.com/ClickHouse/ClickHouse/issues/31363). [#59088](https://github.com/ClickHouse/ClickHouse/pull/59088) ([Shaun Struwig](https://github.com/Blargian)).
* Implemented automatic conversion of merge tree tables of different kinds to replicated engine. Create empty `convert_to_replicated` file in table's data directory (`/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`) and that table will be converted automatically on next server start. [#57798](https://github.com/ClickHouse/ClickHouse/pull/57798) ([Kirill](https://github.com/kirillgarbar)).
* Added function `seriesOutliersTukey` to detect outliers in series data using Tukey's fences algorithm. [#58632](https://github.com/ClickHouse/ClickHouse/pull/58632) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* Added query `ALTER TABLE table FORGET PARTITION partition` that removes ZooKeeper nodes, related to an empty partition. [#59507](https://github.com/ClickHouse/ClickHouse/pull/59507) ([Sergei Trifonov](https://github.com/serxa)). This is an expert-level feature.
* Support JWT credentials file for the NATS table engine. [#59543](https://github.com/ClickHouse/ClickHouse/pull/59543) ([Nickolaj Jepsen](https://github.com/nickolaj-jepsen)).
* Implemented system.dns_cache table, which can be useful for debugging DNS issues. [#59856](https://github.com/ClickHouse/ClickHouse/pull/59856) ([Kirill Nikiforov](https://github.com/allmazz)).
* Implemented `system.dns_cache` table, which can be useful for debugging DNS issues. [#59856](https://github.com/ClickHouse/ClickHouse/pull/59856) ([Kirill Nikiforov](https://github.com/allmazz)).
* The codec `LZ4HC` will accept a new level 2, which is faster than the previous minimum level 3, at the expense of less compression. In previous versions, `LZ4HC(2)` and less was the same as `LZ4HC(3)`. Author: [Cyan4973](https://github.com/Cyan4973). [#60090](https://github.com/ClickHouse/ClickHouse/pull/60090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Implemented `system.dns_cache` table, which can be useful for debugging DNS issues. New server setting dns_cache_max_size. [#60257](https://github.com/ClickHouse/ClickHouse/pull/60257) ([Kirill Nikiforov](https://github.com/allmazz)).
* Support single-argument version for the `merge` table function, as `merge(['db_name', ] 'tables_regexp')`. [#60372](https://github.com/ClickHouse/ClickHouse/pull/60372) ([豪肥肥](https://github.com/HowePa)).
* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#58292](https://github.com/ClickHouse/ClickHouse/pull/58292) ([flynn](https://github.com/ucasfl)).
* Support specifying a set of permitted users for specific S3 settings in config using `user` key. [#60144](https://github.com/ClickHouse/ClickHouse/pull/60144) ([Antonio Andelic](https://github.com/antonio2368)).
* Added table function `mergeTreeIndex`. It represents the contents of index and marks files of `MergeTree` tables. It can be used for introspection. Syntax: `mergeTreeIndex(database, table, [with_marks = true])` where `database.table` is an existing table with `MergeTree` engine. [#58140](https://github.com/ClickHouse/ClickHouse/pull/58140) ([Anton Popov](https://github.com/CurtizJ)).
#### Experimental Feature
* Added function `seriesOutliersDetectTukey` to detect outliers in series data using Tukey's fences algorithm. [#58632](https://github.com/ClickHouse/ClickHouse/pull/58632) ([Bhavna Jindal](https://github.com/bhavnajindal)). Keep in mind that the behavior will be changed in the next patch release.
* Add function `variantType` that returns Enum with variant type name for each row. [#59398](https://github.com/ClickHouse/ClickHouse/pull/59398) ([Kruglov Pavel](https://github.com/Avogar)).
* Support `LEFT JOIN`, `ALL INNER JOIN`, and simple subqueries for parallel replicas (only with analyzer). New setting `parallel_replicas_prefer_local_join` chooses local `JOIN` execution (by default) vs `GLOBAL JOIN`. All tables should exist on every replica from `cluster_for_parallel_replicas`. New settings `min_external_table_block_size_rows` and `min_external_table_block_size_bytes` are used to squash small blocks that are sent for temporary tables (only with analyzer). [#58916](https://github.com/ClickHouse/ClickHouse/pull/58916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Allow concurrent table creation in the `Replicated` database during adding or recovering a new replica. [#59277](https://github.com/ClickHouse/ClickHouse/pull/59277) ([Konstantin Bogdanov](https://github.com/thevar1able)).
@ -60,7 +61,7 @@
* Now if `StorageBuffer` has more than 1 shard (`num_layers` > 1) background flush will happen simultaneously for all shards in multiple threads. [#60111](https://github.com/ClickHouse/ClickHouse/pull/60111) ([alesapin](https://github.com/alesapin)).
#### Improvement
* When output format is Pretty format and a block consists of a single numeric value which exceeds one million, A readable number will be printed on table right. [#60379](https://github.com/ClickHouse/ClickHouse/pull/60379) ([rogeryk](https://github.com/rogeryk)).
* When output format is `Pretty` format and a block consists of a single numeric value which exceeds one million, A readable number will be printed on table right. [#60379](https://github.com/ClickHouse/ClickHouse/pull/60379) ([rogeryk](https://github.com/rogeryk)).
* Added settings `split_parts_ranges_into_intersecting_and_non_intersecting_final` and `split_intersecting_parts_ranges_into_layers_final`. These settings are needed to disable optimizations for queries with `FINAL` and needed for debug only. [#59705](https://github.com/ClickHouse/ClickHouse/pull/59705) ([Maksim Kita](https://github.com/kitaisreal)). Actually not only for that - they can also lower memory usage at the expense of performance.
* Rename the setting `extract_kvp_max_pairs_per_row` to `extract_key_value_pairs_max_pairs_per_row`. The issue (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59683](https://github.com/ClickHouse/ClickHouse/pull/59683) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#59960](https://github.com/ClickHouse/ClickHouse/pull/59960) ([jsc0218](https://github.com/jsc0218)).
* Running `ALTER COLUMN MATERIALIZE` on a column with `DEFAULT` or `MATERIALIZED` expression now precisely follows the semantics. [#58023](https://github.com/ClickHouse/ClickHouse/pull/58023) ([Duc Canh Le](https://github.com/canhld94)).

View File

@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s
| Version | Supported |
|:-|:-|
| 24.2 | ✔️ |
| 24.1 | ✔️ |
| 23.12 | ✔️ |
| 23.11 | ✔️ |
| 23.11 | |
| 23.10 | ❌ |
| 23.9 | ❌ |
| 23.8 | ✔️ |

View File

@ -1,14 +1,9 @@
#pragma once
#include <base/extended_types.h>
#include <base/Decimal_fwd.h>
#include <base/defines.h>
#if !defined(NO_SANITIZE_UNDEFINED)
#if defined(__clang__)
#define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
#else
#define NO_SANITIZE_UNDEFINED
#endif
#endif
namespace DB
{

View File

@ -10,14 +10,10 @@
#define JSON_MAX_DEPTH 100
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
#endif
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
POCO_IMPLEMENT_EXCEPTION(JSONException, Poco::Exception, "JSONException") // NOLINT(cert-err60-cpp, modernize-use-noexcept, hicpp-use-noexcept)
#ifdef __clang__
# pragma clang diagnostic pop
#endif
#pragma clang diagnostic pop
/// Read unsigned integer in a simple form from a non-0-terminated string.

View File

@ -39,14 +39,10 @@
// NOLINTBEGIN(google-explicit-constructor)
#ifdef __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
#endif
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-dynamic-exception-spec"
POCO_DECLARE_EXCEPTION(Foundation_API, JSONException, Poco::Exception)
#ifdef __clang__
# pragma clang diagnostic pop
#endif
#pragma clang diagnostic pop
// NOLINTEND(google-explicit-constructor)
class JSON

View File

@ -13,11 +13,7 @@
#include <unistd.h>
# if defined(__clang__)
extern "C" void __llvm_profile_dump(); // NOLINT
# elif defined(__GNUC__) || defined(__GNUG__)
extern "C" void __gcov_exit();
# endif
#endif
@ -28,12 +24,7 @@ void dumpCoverageReportIfPossible()
static std::mutex mutex;
std::lock_guard lock(mutex);
# if defined(__clang__)
__llvm_profile_dump(); // NOLINT
# elif defined(__GNUC__) || defined(__GNUG__)
__gcov_exit();
# endif
#endif
}

View File

@ -11,7 +11,7 @@
/// including <base/defines.h>
/// - it should not have fallback to 0,
/// since this may create false-positive detection (common problem)
#if defined(__clang__) && defined(__has_feature)
#if defined(__has_feature)
# define ch_has_feature __has_feature
#endif
@ -76,24 +76,11 @@
/// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute.
/// It is useful in case when compiler cannot see (and exploit) it, but UBSan can.
/// Example: multiplication of signed integers with possibility of overflow when both sides are from user input.
#if defined(__clang__)
# define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
# define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
#else /// It does not work in GCC. GCC 7 cannot recognize this attribute and GCC 8 simply ignores it.
# define NO_SANITIZE_UNDEFINED
# define NO_SANITIZE_ADDRESS
# define NO_SANITIZE_THREAD
# define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED ALWAYS_INLINE
#endif
#if defined(__clang__) && defined(__clang_major__) && __clang_major__ >= 14
# define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation))
#else
# define DISABLE_SANITIZER_INSTRUMENTATION
#endif
#define NO_SANITIZE_UNDEFINED __attribute__((__no_sanitize__("undefined")))
#define NO_SANITIZE_ADDRESS __attribute__((__no_sanitize__("address")))
#define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
#define ALWAYS_INLINE_NO_SANITIZE_UNDEFINED __attribute__((__always_inline__, __no_sanitize__("undefined")))
#define DISABLE_SANITIZER_INSTRUMENTATION __attribute__((disable_sanitizer_instrumentation))
#if !__has_include(<sanitizer/asan_interface.h>) || !defined(ADDRESS_SANITIZER)
# define ASAN_UNPOISON_MEMORY_REGION(a, b)
@ -135,54 +122,33 @@
/// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
/// Feel free to extend, but please stay close to https://clang.llvm.org/docs/ThreadSafetyAnalysis.html#mutexheader
#if defined(__clang__)
# define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability
# define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability
# define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability
# define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability
# define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock
# define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function
# define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) /// object of a class can be used as capability
# define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) /// function acquires a capability, but does not release it
# define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) /// function tries to acquire a capability and returns a boolean value indicating success or failure
# define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) /// function releases the given capability
# define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it
# define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure
# define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability
# define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability
#define TSA_GUARDED_BY(...) __attribute__((guarded_by(__VA_ARGS__))) /// data is protected by given capability
#define TSA_PT_GUARDED_BY(...) __attribute__((pt_guarded_by(__VA_ARGS__))) /// pointed-to data is protected by the given capability
#define TSA_REQUIRES(...) __attribute__((requires_capability(__VA_ARGS__))) /// thread needs exclusive possession of given capability
#define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability
#define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock
#define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function
#define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) /// object of a class can be used as capability
#define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) /// function acquires a capability, but does not release it
#define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) /// function tries to acquire a capability and returns a boolean value indicating success or failure
#define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) /// function releases the given capability
#define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it
#define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure
#define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability
#define TSA_SCOPED_LOCKABLE __attribute__((scoped_lockable)) /// object of a class has scoped lockable capability
/// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function)
/// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of
/// suppressing them in the whole function
/// Consider adding a comment when using these macros.
# define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }())
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }())
#define TSA_SUPPRESS_WARNING_FOR_READ(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> const auto & { return (x); }())
#define TSA_SUPPRESS_WARNING_FOR_WRITE(x) ([&]() TSA_NO_THREAD_SAFETY_ANALYSIS -> auto & { return (x); }())
/// This macro is useful when only one thread writes to a member
/// and you want to read this member from the same thread without locking a mutex.
/// It's safe (because no concurrent writes are possible), but TSA generates a warning.
/// (Seems like there's no way to verify it, but it makes sense to distinguish it from TSA_SUPPRESS_WARNING_FOR_READ for readability)
# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x)
#else
# define TSA_GUARDED_BY(...)
# define TSA_PT_GUARDED_BY(...)
# define TSA_REQUIRES(...)
# define TSA_REQUIRES_SHARED(...)
# define TSA_NO_THREAD_SAFETY_ANALYSIS
# define TSA_CAPABILITY(...)
# define TSA_ACQUIRE(...)
# define TSA_TRY_ACQUIRE(...)
# define TSA_RELEASE(...)
# define TSA_ACQUIRE_SHARED(...)
# define TSA_TRY_ACQUIRE_SHARED(...)
# define TSA_RELEASE_SHARED(...)
# define TSA_SCOPED_LOCKABLE
# define TSA_SUPPRESS_WARNING_FOR_READ(x) (x)
# define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x)
# define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x)
#endif
#define TSA_READ_ONE_THREAD(x) TSA_SUPPRESS_WARNING_FOR_READ(x)
/// A template function for suppressing warnings about unused variables or function results.
template <typename... Args>

View File

@ -155,9 +155,7 @@ Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-mis
return dumpValue(out, x) << "; ";
}
#ifdef __clang__
#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
#endif
#define DUMPVAR(VAR) ::dump(std::cerr, #VAR, (VAR));
#define DUMPHEAD std::cerr << __FILE__ << ':' << __LINE__ << " [ " << getThreadId() << " ] ";

View File

@ -11,10 +11,8 @@
/// Thread Sanitizer uses dl_iterate_phdr function on initialization and fails if we provide our own.
#ifdef USE_PHDR_CACHE
#if defined(__clang__)
# pragma clang diagnostic ignored "-Wreserved-id-macro"
# pragma clang diagnostic ignored "-Wunused-macros"
#endif
#pragma clang diagnostic ignored "-Wreserved-id-macro"
#pragma clang diagnostic ignored "-Wunused-macros"
#define __msan_unpoison(X, Y) // NOLINT
#if defined(ch_has_feature)
@ -57,10 +55,6 @@ std::atomic<PHDRCache *> phdr_cache {};
extern "C"
#ifndef __clang__
[[gnu::visibility("default")]]
[[gnu::externally_visible]]
#endif
int dl_iterate_phdr(int (*callback) (dl_phdr_info * info, size_t size, void * data), void * data)
{
auto * current_phdr_cache = phdr_cache.load();

View File

@ -2,6 +2,7 @@
.hidden __syscall
.type __syscall,%function
__syscall:
.cfi_startproc
uxtw x8,w0
mov x0,x1
mov x1,x2
@ -12,3 +13,4 @@ __syscall:
mov x6,x7
svc 0
ret
.cfi_endproc

View File

@ -20,11 +20,7 @@
/// Suppress TSan since it is possible for this code to be called from multiple threads,
/// and initialization is safe to be done multiple times from multiple threads.
#if defined(__clang__)
# define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
#else
# define NO_SANITIZE_THREAD
#endif
#define NO_SANITIZE_THREAD __attribute__((__no_sanitize__("thread")))
// We don't have libc struct available here.
// Compute aux vector manually (from /proc/self/auxv).

View File

@ -6,11 +6,7 @@
/// It is only enabled in debug build (its intended use is for CI checks).
#if !defined(NDEBUG)
#if defined(__clang__)
#pragma clang diagnostic ignored "-Wincompatible-library-redeclaration"
#else
#pragma GCC diagnostic ignored "-Wbuiltin-declaration-mismatch"
#endif
#pragma clang diagnostic ignored "-Wincompatible-library-redeclaration"
/// We cannot use libc headers here.
long write(int, const void *, unsigned long);

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54483)
SET(VERSION_REVISION 54484)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 2)
SET(VERSION_MINOR 3)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 5a024dfc0936e062770d0cfaad0805b57c1fba17)
SET(VERSION_DESCRIBE v24.2.1.1-testing)
SET(VERSION_STRING 24.2.1.1)
SET(VERSION_GITHASH 891689a41506d00aa169548f5b4a8774351242c4)
SET(VERSION_DESCRIBE v24.3.1.1-testing)
SET(VERSION_STRING 24.3.1.1)
# end of autochange

2
contrib/cctz vendored

@ -1 +1 @@
Subproject commit 8529bcef5cd996b7c0f4d7475286b76b5d126c4c
Subproject commit 7918cb7afe82e53428e39a045a437fdfd4f3df47

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.1.5.6"
ARG VERSION="24.2.1.2248"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.1.5.6"
ARG VERSION="24.2.1.2248"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.1.5.6"
ARG VERSION="24.2.1.2248"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
# set non-empty deb_location_url url to create a docker image

View File

@ -386,7 +386,8 @@ if [ -f core.zst ]; then
CORE_LINK='<a href="core.zst">core.zst</a>'
fi
rg --text -F '<Fatal>' server.log > fatal.log ||:
# Keep all the lines in the paragraphs containing <Fatal> that either contain <Fatal> or don't start with 20... (year)
sed -n '/<Fatal>/,/^$/p' s.log | awk '/<Fatal>/ || !/^20/' server.log > fatal.log ||:
FATAL_LINK=''
if [ -s fatal.log ]; then
FATAL_LINK='<a href="fatal.log">fatal.log</a>'

View File

@ -20,6 +20,8 @@ if [ -n "$WITH_LOCAL_BINARY" ]; then
clickhouse_source="--clickhouse-source /clickhouse"
fi
# $TESTS_TO_RUN comes from docker
# shellcheck disable=SC2153
tests_count="--test-count $TESTS_TO_RUN"
tests_to_run="test-all"
workload=""
@ -47,6 +49,6 @@ fi
cd "$CLICKHOUSE_REPO_PATH/tests/jepsen.clickhouse"
(lein run server $tests_to_run $workload --keeper "$KEEPER_NODE" $concurrency $nemesis $rate --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 $clickhouse_source $tests_count --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log"
(lein run server $tests_to_run "$workload" --keeper "$KEEPER_NODE" "$concurrency" "$nemesis" "$rate" --nodes-file "$NODES_FILE_PATH" --username "$NODES_USERNAME" --logging-json --password "$NODES_PASSWORD" --time-limit "$TIME_LIMIT" --concurrency 50 "$clickhouse_source" "$tests_count" --reuse-binary || true) | tee "$TEST_OUTPUT/jepsen_run_all_tests.log"
mv store "$TEST_OUTPUT/"

View File

@ -1,5 +1,6 @@
#!/bin/bash
# shellcheck source=./utils.lib
source /utils.lib
function attach_gdb_to_clickhouse()

View File

@ -57,8 +57,20 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th
sudo mv /etc/clickhouse-server/config.d/zookeeper.xml.tmp /etc/clickhouse-server/config.d/zookeeper.xml
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/handlers.yaml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
function remove_keeper_config()
{
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
| sed "/<$1>$2<\/$1>/d" \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
}
# commit_logs_cache_size_threshold setting doesn't exist on some older versions
remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
fi
# For flaky check we also enable thread fuzzer

View File

@ -19,7 +19,7 @@ function escaped()
function head_escaped()
{
head -n $FAILURE_CONTEXT_LINES $1 | escaped
head -n "$FAILURE_CONTEXT_LINES" "$1" | escaped
}
function unts()
@ -29,15 +29,15 @@ function unts()
function trim_server_logs()
{
head -n $FAILURE_CONTEXT_LINES "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
head -n "$FAILURE_CONTEXT_LINES" "/test_output/$1" | grep -Eo " \[ [0-9]+ \] \{.*" | escaped
}
function install_packages()
{
dpkg -i $1/clickhouse-common-static_*.deb
dpkg -i $1/clickhouse-common-static-dbg_*.deb
dpkg -i $1/clickhouse-server_*.deb
dpkg -i $1/clickhouse-client_*.deb
dpkg -i "$1"/clickhouse-common-static_*.deb
dpkg -i "$1"/clickhouse-common-static-dbg_*.deb
dpkg -i "$1"/clickhouse-server_*.deb
dpkg -i "$1"/clickhouse-client_*.deb
}
function configure()
@ -54,11 +54,11 @@ function configure()
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
function randomize_config_boolean_value {
value=$(($RANDOM % 2))
sudo cat /etc/clickhouse-server/config.d/$2.xml \
value=$((RANDOM % 2))
sudo cat "/etc/clickhouse-server/config.d/$2.xml" \
| sed "s|<$1>[01]</$1>|<$1>$value</$1>|" \
> /etc/clickhouse-server/config.d/$2.xml.tmp
sudo mv /etc/clickhouse-server/config.d/$2.xml.tmp /etc/clickhouse-server/config.d/$2.xml
> "/etc/clickhouse-server/config.d/$2.xml.tmp"
sudo mv "/etc/clickhouse-server/config.d/$2.xml.tmp" "/etc/clickhouse-server/config.d/$2.xml"
}
if [[ -n "$RANDOMIZE_KEEPER_FEATURE_FLAGS" ]] && [[ "$RANDOMIZE_KEEPER_FEATURE_FLAGS" -eq 1 ]]; then
@ -146,17 +146,17 @@ EOL
}
function stop()
function stop_server()
{
local max_tries="${1:-90}"
local check_hang="${2:-true}"
local max_tries=90
local check_hang=true
local pid
# Preserve the pid, since the server can hung after the PID will be deleted.
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
if [ $check_hang == true ]
if [ "$check_hang" == true ]
then
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
# Add a special status just in case, so it will be possible to find in the CI DB
@ -165,7 +165,7 @@ function stop()
sleep 5
# The server could finally stop while we were terminating gdb, let's recheck if it's still running
kill -s 0 $pid || return
kill -s 0 "$pid" || return
echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv
echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log
timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log
@ -176,12 +176,13 @@ function stop()
fi
}
function start()
function start_server()
{
counter=0
max_attempt=120
until clickhouse-client --query "SELECT 1"
do
if [ "$counter" -gt ${1:-120} ]
if [ "$counter" -gt "$max_attempt" ]
then
echo "Cannot start clickhouse-server"
rg --text "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||:
@ -286,9 +287,9 @@ function collect_query_and_trace_logs()
function collect_core_dumps()
{
find . -type f -maxdepth 1 -name 'core.*' | while read core; do
zstd --threads=0 $core
mv $core.zst /test_output/
find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do
zstd --threads=0 "$core"
mv "$core.zst" /test_output/
done
}

View File

@ -16,7 +16,9 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
# shellcheck source=../stateless/attach_gdb.lib
source /attach_gdb.lib
# shellcheck source=../stateless/stress_tests.lib
source /stress_tests.lib
install_packages package_folder
@ -55,7 +57,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
start
start_server
setup_logs_replication
@ -65,7 +67,7 @@ clickhouse-client --query "SHOW TABLES FROM datasets"
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
stop
stop_server
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
# Randomize cache policies.
@ -85,7 +87,7 @@ if [ "$cache_policy" = "SLRU" ]; then
mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
fi
start
start_server
clickhouse-client --query "SHOW TABLES FROM datasets"
clickhouse-client --query "SHOW TABLES FROM test"
@ -188,7 +190,7 @@ clickhouse-client --query "SHOW TABLES FROM test"
clickhouse-client --query "SYSTEM STOP THREAD FUZZER"
stop
stop_server
# Let's enable S3 storage by default
export USE_S3_STORAGE_FOR_MERGE_TREE=1
@ -222,7 +224,7 @@ if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
> /etc/clickhouse-server/config.d/enable_async_load_databases.xml
fi
start
start_server
stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \
&& echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \
@ -232,18 +234,18 @@ stress --hung-check --drop-databases --output-folder test_output --skip-func-tes
rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \
|| echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log)" >> /test_output/test_results.tsv
stop
stop_server
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
# NOTE Disable thread fuzzer before server start with data after stress test.
# In debug build it can take a lot of time.
unset "${!THREAD_@}"
start
start_server
check_server_start
stop
stop_server
[ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
[ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"
@ -272,7 +274,7 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d
(test like '%Signal 9%') DESC,
(test like '%Fatal message%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo -e "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# But OOMs in stress test are allowed

View File

@ -16,7 +16,6 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
moreutils \
python3-fuzzywuzzy \
python3-pip \
shellcheck \
yamllint \
locales \
&& pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \
@ -30,6 +29,19 @@ ENV LC_ALL en_US.UTF-8
# Architecture of the image when BuildKit/buildx is used
ARG TARGETARCH
ARG SHELLCHECK_VERSION=0.9.0
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) sarch=x86_64 ;; \
arm64) sarch=aarch64 ;; \
esac \
&& curl -L \
"https://github.com/koalaman/shellcheck/releases/download/v${SHELLCHECK_VERSION}/shellcheck-v${SHELLCHECK_VERSION}.linux.${sarch}.tar.xz" \
| tar xJ --strip=1 -C /tmp \
&& mv /tmp/shellcheck /usr/bin \
&& rm -rf /tmp/*
# Get act and actionlint from releases
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \

View File

@ -16,7 +16,9 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
# shellcheck source=../stateless/attach_gdb.lib
source /attach_gdb.lib
# shellcheck source=../stateless/stress_tests.lib
source /stress_tests.lib
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
@ -89,6 +91,7 @@ rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xm
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml
rm /etc/clickhouse-server/config.d/handlers.yaml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
@ -136,6 +139,7 @@ rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xm
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml
rm /etc/clickhouse-server/config.d/handlers.yaml
rm /etc/clickhouse-server/config.d/block_number.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
@ -335,7 +339,7 @@ clickhouse-local --structure "test String, res String, time Nullable(Float32), d
(test like '%Changed settings%') DESC,
(test like '%New settings%') DESC,
rowNumberInAllBlocks()
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo -e "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv
[ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
# But OOMs in stress test are allowed

View File

@ -0,0 +1,39 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v23.8.10.43-lts (a278225bba9) FIXME as compared to v23.8.9.54-lts (192a1d231fa)
#### Improvement
* Backported in [#58819](https://github.com/ClickHouse/ClickHouse/issues/58819): Add `SYSTEM JEMALLOC PURGE` for purging unused jemalloc pages, `SYSTEM JEMALLOC [ ENABLE | DISABLE | FLUSH ] PROFILE` for controlling jemalloc profile if the profiler is enabled. Add jemalloc-related 4LW command in Keeper: `jmst` for dumping jemalloc stats, `jmfp`, `jmep`, `jmdp` for controlling jemalloc profile if the profiler is enabled. [#58665](https://github.com/ClickHouse/ClickHouse/pull/58665) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#60286](https://github.com/ClickHouse/ClickHouse/issues/60286): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
#### Build/Testing/Packaging Improvement
* Backported in [#59879](https://github.com/ClickHouse/ClickHouse/issues/59879): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Background merges correctly use temporary data storage in the cache [#57275](https://github.com/ClickHouse/ClickHouse/pull/57275) ([vdimir](https://github.com/vdimir)).
* MergeTree mutations reuse source part index granularity [#57352](https://github.com/ClickHouse/ClickHouse/pull/57352) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix double destroy call on exception throw in addBatchLookupTable8 [#58745](https://github.com/ClickHouse/ClickHouse/pull/58745) ([Raúl Marín](https://github.com/Algunenano)).
* Fix JSONExtract function for LowCardinality(Nullable) columns [#58808](https://github.com/ClickHouse/ClickHouse/pull/58808) ([vdimir](https://github.com/vdimir)).
* Fix: LIMIT BY and LIMIT in distributed query [#59153](https://github.com/ClickHouse/ClickHouse/pull/59153) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)).
* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Fix rare race in external sort/aggregation with temporary data in cache [#58013](https://github.com/ClickHouse/ClickHouse/pull/58013) ([Anton Popov](https://github.com/CurtizJ)).
* Fix possible race in ManyAggregatedData dtor. [#58624](https://github.com/ClickHouse/ClickHouse/pull/58624) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)).
* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)).
* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).

View File

@ -0,0 +1,462 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.2.1.2248-stable (891689a4150) FIXME as compared to v24.1.1.2048-stable (5a024dfc093)
#### Backward Incompatible Change
* Validate suspicious/experimental types in nested types. Previously we didn't validate such types (except JSON) in nested types like Array/Tuple/Map. [#59385](https://github.com/ClickHouse/ClickHouse/pull/59385) ([Kruglov Pavel](https://github.com/Avogar)).
* The sort clause `ORDER BY ALL` (introduced with v23.12) is replaced by `ORDER BY *`. The previous syntax was too error-prone for tables with a column `all`. [#59450](https://github.com/ClickHouse/ClickHouse/pull/59450) ([Robert Schulze](https://github.com/rschu1ze)).
* Rename the setting `extract_kvp_max_pairs_per_row` to `extract_key_value_pairs_max_pairs_per_row`. The bug (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59683](https://github.com/ClickHouse/ClickHouse/pull/59683) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Rename the setting extract_kvp_max_pairs_per_row to extract_key_value_pairs_max_pairs_per_row. The bug (unnecessary abbreviation in the setting name) was introduced in https://github.com/ClickHouse/ClickHouse/pull/43606. Fix the documentation of this setting. [#59960](https://github.com/ClickHouse/ClickHouse/pull/59960) ([jsc0218](https://github.com/jsc0218)).
* Add sanity check for number of threads and block sizes. [#60138](https://github.com/ClickHouse/ClickHouse/pull/60138) ([Raúl Marín](https://github.com/Algunenano)).
#### New Feature
* Added maximum sequential login failures to the quota. [#54737](https://github.com/ClickHouse/ClickHouse/pull/54737) ([Alexey Gerasimchuck](https://github.com/Demilivor)).
* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. [#54901](https://github.com/ClickHouse/ClickHouse/pull/54901) ([pufit](https://github.com/pufit)).
* Backup & Restore support for AzureBlobStorage resolves [#50747](https://github.com/ClickHouse/ClickHouse/issues/50747). [#56988](https://github.com/ClickHouse/ClickHouse/pull/56988) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Implemented automatic conversion of merge tree tables of different kinds to replicated engine. Create empty `convert_to_replicated` file in table's data directory (`/clickhouse/store/xxx/xxxyyyyy-yyyy-yyyy-yyyy-yyyyyyyyyyyy/`) and that table will be converted automatically on next server start. [#57798](https://github.com/ClickHouse/ClickHouse/pull/57798) ([Kirill](https://github.com/kirillgarbar)).
* Added table function `mergeTreeIndex`. It represents the contents of index and marks files of `MergeTree` tables. It can be used for introspection. Syntax: `mergeTreeIndex(database, table, [with_marks = true])` where `database.table` is an existing table with `MergeTree` engine. [#58140](https://github.com/ClickHouse/ClickHouse/pull/58140) ([Anton Popov](https://github.com/CurtizJ)).
* Added function `seriesOutliersTukey` to detect outliers in series data using Tukey's fences algorithm. [#58632](https://github.com/ClickHouse/ClickHouse/pull/58632) ([Bhavna Jindal](https://github.com/bhavnajindal)).
* The user can now specify the template string directly in the query using `format_schema_rows_template` as an alternative to `format_template_row`. Closes [#31363](https://github.com/ClickHouse/ClickHouse/issues/31363). [#59088](https://github.com/ClickHouse/ClickHouse/pull/59088) ([Shaun Struwig](https://github.com/Blargian)).
* Try to detect file format automatically during schema inference if it's unknown in `file/s3/hdfs/url/azureBlobStorage` engines. Closes [#50576](https://github.com/ClickHouse/ClickHouse/issues/50576). [#59092](https://github.com/ClickHouse/ClickHouse/pull/59092) ([Kruglov Pavel](https://github.com/Avogar)).
* Add function variantType that returns Enum with variant type name for each row. [#59398](https://github.com/ClickHouse/ClickHouse/pull/59398) ([Kruglov Pavel](https://github.com/Avogar)).
* Added query `ALTER TABLE table FORGET PARTITION partition` that removes ZooKeeper nodes, related to an empty partition. [#59507](https://github.com/ClickHouse/ClickHouse/pull/59507) ([Sergei Trifonov](https://github.com/serxa)).
* Support JWT credentials file for the NATS table engine. [#59543](https://github.com/ClickHouse/ClickHouse/pull/59543) ([Nickolaj Jepsen](https://github.com/nickolaj-jepsen)).
* Provides new aggregate function groupArrayIntersect. Follows up: [#49862](https://github.com/ClickHouse/ClickHouse/issues/49862). [#59598](https://github.com/ClickHouse/ClickHouse/pull/59598) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Implemented system.dns_cache table, which can be useful for debugging DNS issues. [#59856](https://github.com/ClickHouse/ClickHouse/pull/59856) ([Kirill Nikiforov](https://github.com/allmazz)).
* The codec `LZ4HC` will accept a new level 2, which is faster than the previous minimum level 3, at the expense of less compression. In previous versions, `LZ4HC(2)` and less was the same as `LZ4HC(3)`. Author: [Cyan4973](https://github.com/Cyan4973). [#60090](https://github.com/ClickHouse/ClickHouse/pull/60090) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Implemented system.dns_cache table, which can be useful for debugging DNS issues. New server setting dns_cache_max_size. [#60257](https://github.com/ClickHouse/ClickHouse/pull/60257) ([Kirill Nikiforov](https://github.com/allmazz)).
* Support single-argument version for the merge table function, as `merge(['db_name', ] 'tables_regexp')`. [#60372](https://github.com/ClickHouse/ClickHouse/pull/60372) ([豪肥肥](https://github.com/HowePa)).
* Added new syntax which allows to specify definer user in View/Materialized View. This allows to execute selects/inserts from views without explicit grants for underlying tables. [#60439](https://github.com/ClickHouse/ClickHouse/pull/60439) ([pufit](https://github.com/pufit)).
#### Performance Improvement
* Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section. [#52230](https://github.com/ClickHouse/ClickHouse/pull/52230) ([JackyWoo](https://github.com/JackyWoo)).
* Vectorized distance functions used in vector search. [#58866](https://github.com/ClickHouse/ClickHouse/pull/58866) ([Robert Schulze](https://github.com/rschu1ze)).
* Continue optimizing branch miss of if function when result type is float*/decimal*/int* , follow up of https://github.com/ClickHouse/ClickHouse/pull/57885. [#59148](https://github.com/ClickHouse/ClickHouse/pull/59148) ([李扬](https://github.com/taiyang-li)).
* Optimize if function when input type is map, speed up by ~10x. [#59413](https://github.com/ClickHouse/ClickHouse/pull/59413) ([李扬](https://github.com/taiyang-li)).
* Improve performance of Int8 type by implementing strict aliasing. [#59485](https://github.com/ClickHouse/ClickHouse/pull/59485) ([Raúl Marín](https://github.com/Algunenano)).
* Optimize performance of sum/avg conditionally for bigint and big decimal types by reducing branch miss. [#59504](https://github.com/ClickHouse/ClickHouse/pull/59504) ([李扬](https://github.com/taiyang-li)).
* Improve performance of SELECTs with active mutations. [#59531](https://github.com/ClickHouse/ClickHouse/pull/59531) ([Azat Khuzhin](https://github.com/azat)).
* Optimized function `isNotNull` with AVX2. [#59621](https://github.com/ClickHouse/ClickHouse/pull/59621) ([李扬](https://github.com/taiyang-li)).
* Reuse the result of `FunctionFactory::instance().get("isNotNull", context)` and `FunctionFactory::instance().get("assumeNotNull", context)`. Make sure it is called once during the lifetime of `FunctionCoalesce`. [#59627](https://github.com/ClickHouse/ClickHouse/pull/59627) ([李扬](https://github.com/taiyang-li)).
* Improve ASOF JOIN performance for sorted or almost sorted data. [#59731](https://github.com/ClickHouse/ClickHouse/pull/59731) ([Maksim Kita](https://github.com/kitaisreal)).
* Primary key will use less amount of memory. [#60049](https://github.com/ClickHouse/ClickHouse/pull/60049) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Improve memory usage for primary key and some other operations. [#60050](https://github.com/ClickHouse/ClickHouse/pull/60050) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The tables' primary keys will be loaded in memory lazily on first access. This is controlled by the new MergeTree setting `primary_key_lazy_load`, which is on by default. This provides several advantages: - it will not be loaded for tables that are not used; - if there is not enough memory, an exception will be thrown on first use instead of at server startup. This provides several disadvantages: - the latency of loading the primary key will be paid on the first query rather than before accepting connections; this theoretically may introduce a thundering-herd problem. This closes [#11188](https://github.com/ClickHouse/ClickHouse/issues/11188). [#60093](https://github.com/ClickHouse/ClickHouse/pull/60093) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Vectorized function `dotProduct` which is useful for vector search. [#60202](https://github.com/ClickHouse/ClickHouse/pull/60202) ([Robert Schulze](https://github.com/rschu1ze)).
* As is shown in Fig 1, the replacement of "&&" with "&" could generate the SIMD code. ![image](https://github.com/ClickHouse/ClickHouse/assets/26588299/a5a72ac4-6dc6-4d52-835a-4f512e55f0b9) Fig 1. Code compiled from '&&' (left) and '&' (right). [#60498](https://github.com/ClickHouse/ClickHouse/pull/60498) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
#### Improvement
* Added support for parameterized view with analyzer to not analyze create parameterized view. Refactor existing parameterized view logic to not analyze create parameterized view. [#54211](https://github.com/ClickHouse/ClickHouse/pull/54211) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Do not consider data part as broken if projection is broken. Closes [#56593](https://github.com/ClickHouse/ClickHouse/issues/56593). [#56864](https://github.com/ClickHouse/ClickHouse/pull/56864) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add short-circuit ability for `dictGetOrDefault` function. Closes [#52098](https://github.com/ClickHouse/ClickHouse/issues/52098). [#57767](https://github.com/ClickHouse/ClickHouse/pull/57767) ([jsc0218](https://github.com/jsc0218)).
* Running `ALTER COLUMN MATERIALIZE` on a column with `DEFAULT` or `MATERIALIZED` expression now writes the correct values: The default value for existing parts with default value or the non-default value for existing parts with non-default value. Previously, the default value was written for all existing parts. [#58023](https://github.com/ClickHouse/ClickHouse/pull/58023) ([Duc Canh Le](https://github.com/canhld94)).
* Enabled a backoff logic (e.g. exponential). Will provide an ability for reduced CPU usage, memory usage and log file sizes. [#58036](https://github.com/ClickHouse/ClickHouse/pull/58036) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Add improvement to count InitialQuery. [#58195](https://github.com/ClickHouse/ClickHouse/pull/58195) ([Unalian](https://github.com/Unalian)).
* Support negative positional arguments. Closes [#57736](https://github.com/ClickHouse/ClickHouse/issues/57736). [#58292](https://github.com/ClickHouse/ClickHouse/pull/58292) ([flynn](https://github.com/ucasfl)).
* Implement auto-adjustment for asynchronous insert timeouts. The following settings are introduced: async_insert_poll_timeout_ms, async_insert_use_adaptive_busy_timeout, async_insert_busy_timeout_min_ms, async_insert_busy_timeout_max_ms, async_insert_busy_timeout_increase_rate, async_insert_busy_timeout_decrease_rate. [#58486](https://github.com/ClickHouse/ClickHouse/pull/58486) ([Julia Kartseva](https://github.com/jkartseva)).
* Allow to define `volume_priority` in `storage_configuration`. [#58533](https://github.com/ClickHouse/ClickHouse/pull/58533) ([Andrey Zvonov](https://github.com/zvonand)).
* Add support for Date32 type in T64 codec. [#58738](https://github.com/ClickHouse/ClickHouse/pull/58738) ([Hongbin Ma](https://github.com/binmahone)).
* Support `LEFT JOIN`, `ALL INNER JOIN`, and simple subqueries for parallel replicas (only with analyzer). New setting `parallel_replicas_prefer_local_join` chooses local `JOIN` execution (by default) vs `GLOBAL JOIN`. All tables should exist on every replica from `cluster_for_parallel_replicas`. New settings `min_external_table_block_size_rows` and `min_external_table_block_size_bytes` are used to squash small blocks that are sent for temporary tables (only with analyzer). [#58916](https://github.com/ClickHouse/ClickHouse/pull/58916) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Allow trailing commas in types with several items. [#59119](https://github.com/ClickHouse/ClickHouse/pull/59119) ([Aleksandr Musorin](https://github.com/AVMusorin)).
* Allow parallel and distributed processing for `S3Queue` table engine. For distributed processing use setting `s3queue_total_shards_num` (by default `1`). Setting `s3queue_processing_threads_num` previously was not allowed for Ordered processing mode, now it is allowed. Warning: settings `s3queue_processing_threads_num`(processing threads per each shard) and `s3queue_total_shards_num` for ordered mode change how metadata is stored (make the number of `max_processed_file` nodes equal to `s3queue_processing_threads_num * s3queue_total_shards_num`), so they must be the same for all shards and cannot be changed once at least one shard is created. [#59167](https://github.com/ClickHouse/ClickHouse/pull/59167) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Allow concurrent table creation in `DatabaseReplicated` during `recoverLostReplica`. [#59277](https://github.com/ClickHouse/ClickHouse/pull/59277) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Settings for the Distributed table engine can now be specified in the server configuration file (similar to MergeTree settings), e.g. ``` <distributed> <flush_on_detach>false</flush_on_detach> </distributed> ```. [#59291](https://github.com/ClickHouse/ClickHouse/pull/59291) ([Azat Khuzhin](https://github.com/azat)).
* Use MergeTree as a default table engine. It makes the usability much better, and closer to ClickHouse Cloud. [#59316](https://github.com/ClickHouse/ClickHouse/pull/59316) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Retry disconnects and expired sessions when reading `system.zookeeper`. This is helpful when reading many rows from `system.zookeeper` table especially in the presence of fault-injected disconnects. [#59388](https://github.com/ClickHouse/ClickHouse/pull/59388) ([Alexander Gololobov](https://github.com/davenger)).
* Do not interpret numbers with leading zeroes as octals when `input_format_values_interpret_expressions=0`. [#59403](https://github.com/ClickHouse/ClickHouse/pull/59403) ([Joanna Hulboj](https://github.com/jh0x)).
* At startup and whenever config files are changed, ClickHouse updates the hard memory limits of its total memory tracker. These limits are computed based on various server settings and cgroups limits (on Linux). Previously, setting `/sys/fs/cgroup/memory.max` (for cgroups v2) was hard-coded. As a result, cgroup v2 memory limits configured for nested groups (hierarchies), e.g. `/sys/fs/cgroup/my/nested/group/memory.max` were ignored. This is now fixed. The behavior of v1 memory limits remains unchanged. [#59435](https://github.com/ClickHouse/ClickHouse/pull/59435) ([Robert Schulze](https://github.com/rschu1ze)).
* New profile events added to observe the time spent on calculating PK/projections/secondary indices during `INSERT`-s. [#59436](https://github.com/ClickHouse/ClickHouse/pull/59436) ([Nikita Taranov](https://github.com/nickitat)).
* Allow to define a starting point for S3Queue with Ordered mode at creation using setting `s3queue_last_processed_path`. [#59446](https://github.com/ClickHouse/ClickHouse/pull/59446) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Keeper improvement: cache only a certain amount of logs in-memory controlled by `latest_logs_cache_size_threshold` and `commit_logs_cache_size_threshold`. [#59460](https://github.com/ClickHouse/ClickHouse/pull/59460) ([Antonio Andelic](https://github.com/antonio2368)).
* Made comments for system tables also available in `system.tables` in `clickhouse-local`. [#59493](https://github.com/ClickHouse/ClickHouse/pull/59493) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Don't infer floats in exponential notation by default. Add a setting `input_format_try_infer_exponent_floats` that will restore previous behaviour (disabled by default). Closes [#59476](https://github.com/ClickHouse/ClickHouse/issues/59476). [#59500](https://github.com/ClickHouse/ClickHouse/pull/59500) ([Kruglov Pavel](https://github.com/Avogar)).
* Allow alter operations to be surrounded by parenthesis. The emission of parentheses can be controlled by the `format_alter_operations_with_parentheses` config. By default in formatted queries the parentheses are emitted as we store the formatted alter operations in some places as metadata (e.g.: mutations). The new syntax clarifies some of the queries where alter operations end in a list. E.g.: `ALTER TABLE x MODIFY TTL date GROUP BY a, b, DROP COLUMN c` cannot be parsed properly with the old syntax. In the new syntax the query `ALTER TABLE x (MODIFY TTL date GROUP BY a, b), (DROP COLUMN c)` is obvious. Older versions are not able to read the new syntax, therefore using the new syntax might cause issues if newer and older version of ClickHouse are mixed in a single cluster. [#59532](https://github.com/ClickHouse/ClickHouse/pull/59532) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* The previous default value equals to 1 MB for `async_insert_max_data_size` appeared to be too small. The new one would be 10 MiB. [#59536](https://github.com/ClickHouse/ClickHouse/pull/59536) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Previously the whole result was accumulated in memory and returned as one big chunk. This change should help to reduce memory consumption when reading many rows from `system.zookeeper`, allow showing intermediate progress (how many rows have been read so far) and avoid hitting connection timeout when result set is big. [#59545](https://github.com/ClickHouse/ClickHouse/pull/59545) ([Alexander Gololobov](https://github.com/davenger)).
* Now dashboard understands both compressed and uncompressed state of URL's #hash (backward compatibility). Continuation of [#59124](https://github.com/ClickHouse/ClickHouse/issues/59124) . [#59548](https://github.com/ClickHouse/ClickHouse/pull/59548) ([Amos Bird](https://github.com/amosbird)).
* Bumped Intel QPL (used by codec `DEFLATE_QPL`) from v1.3.1 to v1.4.0 . Also fixed a bug for polling timeout mechanism, as we observed in same cases timeout won't work properly, if timeout happen, IAA and CPU may process buffer concurrently. So far, we'd better make sure IAA codec status is not QPL_STS_BEING_PROCESSED, then fallback to SW codec. [#59551](https://github.com/ClickHouse/ClickHouse/pull/59551) ([jasperzhu](https://github.com/jinjunzh)).
* Keeper improvement: reduce size of data node even more. [#59592](https://github.com/ClickHouse/ClickHouse/pull/59592) ([Antonio Andelic](https://github.com/antonio2368)).
* Do not show a warning about the server version in ClickHouse Cloud because ClickHouse Cloud handles seamless upgrades automatically. [#59657](https://github.com/ClickHouse/ClickHouse/pull/59657) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* After self-extraction temporary binary is moved instead copying. [#59661](https://github.com/ClickHouse/ClickHouse/pull/59661) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix stack unwinding on MacOS. This closes [#53653](https://github.com/ClickHouse/ClickHouse/issues/53653). [#59690](https://github.com/ClickHouse/ClickHouse/pull/59690) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#59697](https://github.com/ClickHouse/ClickHouse/pull/59697) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Perform synchronous inserts if dependent MV deduplication is enabled through deduplicate_blocks_in_dependent_materialized_views=1. [#59699](https://github.com/ClickHouse/ClickHouse/pull/59699) ([Julia Kartseva](https://github.com/jkartseva)).
* Added settings `split_parts_ranges_into_intersecting_and_non_intersecting_final` and `split_intersecting_parts_ranges_into_layers_final`. This settings are needed to disable optimizations for queries with `FINAL` and needed for debug only. [#59705](https://github.com/ClickHouse/ClickHouse/pull/59705) ([Maksim Kita](https://github.com/kitaisreal)).
* Unify xml and sql created named collection behaviour in kafka storage. [#59710](https://github.com/ClickHouse/ClickHouse/pull/59710) ([Pervakov Grigorii](https://github.com/GrigoryPervakov)).
* In case when `merge_max_block_size_bytes` is small enough and tables contain wide rows (strings or tuples) background merges may stuck in an endless loop. This behaviour is fixed. Follow-up for https://github.com/ClickHouse/ClickHouse/pull/59340. [#59812](https://github.com/ClickHouse/ClickHouse/pull/59812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Allow uuid in replica_path if CREATE TABLE explicitly has it. [#59908](https://github.com/ClickHouse/ClickHouse/pull/59908) ([Azat Khuzhin](https://github.com/azat)).
* Add column `metadata_version` of ReplicatedMergeTree table in `system.tables` system table. [#59942](https://github.com/ClickHouse/ClickHouse/pull/59942) ([Maksim Kita](https://github.com/kitaisreal)).
* Keeper improvement: send only Keeper related metrics/events for Prometheus. [#59945](https://github.com/ClickHouse/ClickHouse/pull/59945) ([Antonio Andelic](https://github.com/antonio2368)).
* The dashboard will display metrics across different ClickHouse versions even if the structure of system tables has changed after the upgrade. [#59967](https://github.com/ClickHouse/ClickHouse/pull/59967) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow loading AZ info from a file. [#59976](https://github.com/ClickHouse/ClickHouse/pull/59976) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Keeper improvement: add retries on failures for Disk related operations. [#59980](https://github.com/ClickHouse/ClickHouse/pull/59980) ([Antonio Andelic](https://github.com/antonio2368)).
* Add new config setting `backups.remove_backup_files_after_failure`: ``` <clickhouse> <backups> <remove_backup_files_after_failure>true</remove_backup_files_after_failure> </backups> </clickhouse> ```. [#60002](https://github.com/ClickHouse/ClickHouse/pull/60002) ([Vitaly Baranov](https://github.com/vitlibar)).
* Use multiple threads while reading the metadata of tables from a backup while executing the RESTORE command. [#60040](https://github.com/ClickHouse/ClickHouse/pull/60040) ([Vitaly Baranov](https://github.com/vitlibar)).
* Now if `StorageBuffer` has more than 1 shard (`num_layers` > 1) background flush will happen simultaneously for all shards in multiple threads. [#60111](https://github.com/ClickHouse/ClickHouse/pull/60111) ([alesapin](https://github.com/alesapin)).
* Support specifying users for specific S3 settings in config using `user` key. [#60144](https://github.com/ClickHouse/ClickHouse/pull/60144) ([Antonio Andelic](https://github.com/antonio2368)).
* Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
* Implement comparison operator for Variant values and proper Field inserting into Variant column. Don't allow creating `Variant` type with similar variant types by default (allow uder a setting `allow_suspicious_variant_types`) Closes [#59996](https://github.com/ClickHouse/ClickHouse/issues/59996). Closes [#59850](https://github.com/ClickHouse/ClickHouse/issues/59850). [#60198](https://github.com/ClickHouse/ClickHouse/pull/60198) ([Kruglov Pavel](https://github.com/Avogar)).
* Short circuit execution for `ULIDStringToDateTime`. [#60211](https://github.com/ClickHouse/ClickHouse/pull/60211) ([Juan Madurga](https://github.com/jlmadurga)).
* Added `query_id` column for tables `system.backups` and `system.backup_log`. Added error stacktrace to `error` column. [#60220](https://github.com/ClickHouse/ClickHouse/pull/60220) ([Maksim Kita](https://github.com/kitaisreal)).
* Connections through the MySQL port now automatically run with setting `prefer_column_name_to_alias = 1` to support QuickSight out-of-the-box. Also, settings `mysql_map_string_to_text_in_show_columns` and `mysql_map_fixed_string_to_text_in_show_columns` are now enabled by default, affecting also only MySQL connections. This increases compatibility with more BI tools. [#60365](https://github.com/ClickHouse/ClickHouse/pull/60365) ([Robert Schulze](https://github.com/rschu1ze)).
* When output format is Pretty format and a block consists of a single numeric value which exceeds one million, A readable number will be printed on table right. e.g. ``` ┌──────count()─┐ │ 233765663884 │ -- 233.77 billion └──────────────┘ ```. [#60379](https://github.com/ClickHouse/ClickHouse/pull/60379) ([rogeryk](https://github.com/rogeryk)).
* Fix a race condition in JavaScript code leading to duplicate charts on top of each other. [#60392](https://github.com/ClickHouse/ClickHouse/pull/60392) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Check for stack overflow in parsers even if the user misconfigured the `max_parser_depth` setting to a very high value. This closes [#59622](https://github.com/ClickHouse/ClickHouse/issues/59622). [#60434](https://github.com/ClickHouse/ClickHouse/pull/60434) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Build/Testing/Packaging Improvement
* Added builds and tests with coverage collection with introspection. Continuation of [#56102](https://github.com/ClickHouse/ClickHouse/issues/56102). [#58792](https://github.com/ClickHouse/ClickHouse/pull/58792) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Include `pytest-reportlog` in integration test CI runner Dockerfile to enable JSON test reports. [#58926](https://github.com/ClickHouse/ClickHouse/pull/58926) ([MyroTk](https://github.com/MyroTk)).
* Update the rust toolchain in `corrosion-cmake` when the CMake cross-compilation toolchain variable is set. [#59309](https://github.com/ClickHouse/ClickHouse/pull/59309) ([Aris Tritas](https://github.com/aris-aiven)).
* Add some fuzzing to ASTLiterals. [#59383](https://github.com/ClickHouse/ClickHouse/pull/59383) ([Raúl Marín](https://github.com/Algunenano)).
* If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
* Remove ability to disable generic clickhouse components (like server/client/...), but keep some that requires extra libraries (like ODBC or keeper). [#59857](https://github.com/ClickHouse/ClickHouse/pull/59857) ([Azat Khuzhin](https://github.com/azat)).
* Query fuzzer will fuzz SETTINGS inside queries. [#60087](https://github.com/ClickHouse/ClickHouse/pull/60087) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add test that validates projections still work after attach partition. [#60415](https://github.com/ClickHouse/ClickHouse/pull/60415) ([Arthur Passos](https://github.com/arthurpassos)).
* Add test that validates attach partition fails if structure differs because of materialized column. [#60418](https://github.com/ClickHouse/ClickHouse/pull/60418) ([Arthur Passos](https://github.com/arthurpassos)).
* Add support for building ClickHouse with clang-19 (master). [#60448](https://github.com/ClickHouse/ClickHouse/pull/60448) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Speedup check-whitespaces check. [#60496](https://github.com/ClickHouse/ClickHouse/pull/60496) ([Raúl Marín](https://github.com/Algunenano)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Non ready set in TTL WHERE. [#57430](https://github.com/ClickHouse/ClickHouse/pull/57430) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix quantilesGK bug [#58216](https://github.com/ClickHouse/ClickHouse/pull/58216) ([李扬](https://github.com/taiyang-li)).
* Disable parallel replicas JOIN with CTE (not analyzer) [#59239](https://github.com/ClickHouse/ClickHouse/pull/59239) ([Raúl Marín](https://github.com/Algunenano)).
* Fix bug with `intDiv` for decimal arguments [#59243](https://github.com/ClickHouse/ClickHouse/pull/59243) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix translate() with FixedString input [#59356](https://github.com/ClickHouse/ClickHouse/pull/59356) ([Raúl Marín](https://github.com/Algunenano)).
* Fix digest calculation in Keeper [#59439](https://github.com/ClickHouse/ClickHouse/pull/59439) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix stacktraces for binaries without debug symbols [#59444](https://github.com/ClickHouse/ClickHouse/pull/59444) ([Azat Khuzhin](https://github.com/azat)).
* Fix `ASTAlterCommand::formatImpl` in case of column specific settings… [#59445](https://github.com/ClickHouse/ClickHouse/pull/59445) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix `SELECT * FROM [...] ORDER BY ALL` with Analyzer [#59462](https://github.com/ClickHouse/ClickHouse/pull/59462) ([zhongyuankai](https://github.com/zhongyuankai)).
* Fix possible uncaught exception during distributed query cancellation [#59487](https://github.com/ClickHouse/ClickHouse/pull/59487) ([Azat Khuzhin](https://github.com/azat)).
* Make MAX use the same rules as permutation for complex types [#59498](https://github.com/ClickHouse/ClickHouse/pull/59498) ([Raúl Marín](https://github.com/Algunenano)).
* Fix corner case when passing `update_insert_deduplication_token_in_dependent_materialized_views` [#59544](https://github.com/ClickHouse/ClickHouse/pull/59544) ([Jordi Villar](https://github.com/jrdi)).
* Fix incorrect result of arrayElement / map[] on empty value [#59594](https://github.com/ClickHouse/ClickHouse/pull/59594) ([Raúl Marín](https://github.com/Algunenano)).
* Fix crash in topK when merging empty states [#59603](https://github.com/ClickHouse/ClickHouse/pull/59603) ([Raúl Marín](https://github.com/Algunenano)).
* Fix distributed table with a constant sharding key [#59606](https://github.com/ClickHouse/ClickHouse/pull/59606) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
* Maintain function alias in RewriteSumFunctionWithSumAndCountVisitor [#59658](https://github.com/ClickHouse/ClickHouse/pull/59658) ([Raúl Marín](https://github.com/Algunenano)).
* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
* Validate types of arguments for `minmax` skipping index [#59733](https://github.com/ClickHouse/ClickHouse/pull/59733) ([Anton Popov](https://github.com/CurtizJ)).
* Fix leftPad / rightPad function with FixedString input [#59739](https://github.com/ClickHouse/ClickHouse/pull/59739) ([Raúl Marín](https://github.com/Algunenano)).
* Fix AST fuzzer issue in function `countMatches` [#59752](https://github.com/ClickHouse/ClickHouse/pull/59752) ([Robert Schulze](https://github.com/rschu1ze)).
* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix StorageURL doing some of the query execution in single thread [#59833](https://github.com/ClickHouse/ClickHouse/pull/59833) ([Michael Kolupaev](https://github.com/al13n321)).
* s3queue: fix uninitialized value [#59897](https://github.com/ClickHouse/ClickHouse/pull/59897) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Fix crash in JSONColumnsWithMetadata format over http [#59925](https://github.com/ClickHouse/ClickHouse/pull/59925) ([Kruglov Pavel](https://github.com/Avogar)).
* Do not rewrite sum() to count() if return value differs in analyzer [#59926](https://github.com/ClickHouse/ClickHouse/pull/59926) ([Azat Khuzhin](https://github.com/azat)).
* UniqExactSet read crash fix [#59928](https://github.com/ClickHouse/ClickHouse/pull/59928) ([Maksim Kita](https://github.com/kitaisreal)).
* ReplicatedMergeTree invalid metadata_version fix [#59946](https://github.com/ClickHouse/ClickHouse/pull/59946) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix data race in `StorageDistributed` [#59987](https://github.com/ClickHouse/ClickHouse/pull/59987) ([Nikita Taranov](https://github.com/nickitat)).
* Run init scripts when option is enabled rather than disabled [#59991](https://github.com/ClickHouse/ClickHouse/pull/59991) ([jktng](https://github.com/jktng)).
* Fix scale conversion for DateTime64 [#60004](https://github.com/ClickHouse/ClickHouse/pull/60004) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix INSERT into SQLite with single quote (by escaping single quotes with a quote instead of backslash) [#60015](https://github.com/ClickHouse/ClickHouse/pull/60015) ([Azat Khuzhin](https://github.com/azat)).
* Fix several logical errors in arrayFold [#60022](https://github.com/ClickHouse/ClickHouse/pull/60022) ([Raúl Marín](https://github.com/Algunenano)).
* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
* Fix possible exception from s3queue table on drop [#60036](https://github.com/ClickHouse/ClickHouse/pull/60036) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix formatting of NOT with single literals [#60042](https://github.com/ClickHouse/ClickHouse/pull/60042) ([Raúl Marín](https://github.com/Algunenano)).
* Use max_query_size from context in DDLLogEntry instead of hardcoded 4096 [#60083](https://github.com/ClickHouse/ClickHouse/pull/60083) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix inconsistent formatting of queries [#60095](https://github.com/ClickHouse/ClickHouse/pull/60095) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix inconsistent formatting of explain in subqueries [#60102](https://github.com/ClickHouse/ClickHouse/pull/60102) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
* Allow casting of bools in string representation to to true bools [#60160](https://github.com/ClickHouse/ClickHouse/pull/60160) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix system.s3queue_log [#60166](https://github.com/ClickHouse/ClickHouse/pull/60166) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix arrayReduce with nullable aggregate function name [#60188](https://github.com/ClickHouse/ClickHouse/pull/60188) ([Raúl Marín](https://github.com/Algunenano)).
* Fix actions execution during preliminary filtering (PK, partition pruning) [#60196](https://github.com/ClickHouse/ClickHouse/pull/60196) ([Azat Khuzhin](https://github.com/azat)).
* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Revert "Replace `ORDER BY ALL` by `ORDER BY *`" [#60248](https://github.com/ClickHouse/ClickHouse/pull/60248) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix http exception codes. [#60252](https://github.com/ClickHouse/ClickHouse/pull/60252) ([Austin Kothig](https://github.com/kothiga)).
* s3queue: fix bug (also fixes flaky test_storage_s3_queue/test.py::test_shards_distributed) [#60282](https://github.com/ClickHouse/ClickHouse/pull/60282) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix use-of-uninitialized-value and invalid result in hashing functions with IPv6 [#60359](https://github.com/ClickHouse/ClickHouse/pull/60359) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix OptimizeDateOrDateTimeConverterWithPreimageVisitor with null arguments [#60453](https://github.com/ClickHouse/ClickHouse/pull/60453) ([Raúl Marín](https://github.com/Algunenano)).
* Merging [#59674](https://github.com/ClickHouse/ClickHouse/issues/59674). [#60470](https://github.com/ClickHouse/ClickHouse/pull/60470) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Correctly check keys in s3Cluster [#60477](https://github.com/ClickHouse/ClickHouse/pull/60477) ([Antonio Andelic](https://github.com/antonio2368)).
#### CI Fix or Improvement (changelog entry is not required)
* ... [#60457](https://github.com/ClickHouse/ClickHouse/pull/60457) ([Max K.](https://github.com/maxknv)).
* ... [#60512](https://github.com/ClickHouse/ClickHouse/pull/60512) ([Max K.](https://github.com/maxknv)).
* Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).
* ... [#60557](https://github.com/ClickHouse/ClickHouse/pull/60557) ([Max K.](https://github.com/maxknv)).
* BUG: build job can report success cache record on failed build Add a check relying on job report fail. [#60587](https://github.com/ClickHouse/ClickHouse/pull/60587) ([Max K.](https://github.com/maxknv)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Revert "Add new aggregation function groupArraySorted()""'. [#59003](https://github.com/ClickHouse/ClickHouse/pull/59003) ([Maksim Kita](https://github.com/kitaisreal)).
* NO CL ENTRY: 'Revert "Update libxml2 version to address some bogus security issues"'. [#59479](https://github.com/ClickHouse/ClickHouse/pull/59479) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Poco Logger small refactoring"'. [#59509](https://github.com/ClickHouse/ClickHouse/pull/59509) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Revert "Poco Logger small refactoring""'. [#59564](https://github.com/ClickHouse/ClickHouse/pull/59564) ([Maksim Kita](https://github.com/kitaisreal)).
* NO CL ENTRY: 'Revert "MergeTree FINAL optimization diagnostics and settings"'. [#59702](https://github.com/ClickHouse/ClickHouse/pull/59702) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Use `MergeTree` as a default table engine"'. [#59711](https://github.com/ClickHouse/ClickHouse/pull/59711) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Rename a setting"'. [#59754](https://github.com/ClickHouse/ClickHouse/pull/59754) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Less error prone interface of read buffers"'. [#59911](https://github.com/ClickHouse/ClickHouse/pull/59911) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* NO CL ENTRY: 'Revert "Update version_date.tsv and changelogs after v24.1.4.19-stable"'. [#59973](https://github.com/ClickHouse/ClickHouse/pull/59973) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* NO CL ENTRY: 'Revert "ReplicatedMergeTree invalid metadata_version fix"'. [#60058](https://github.com/ClickHouse/ClickHouse/pull/60058) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Revert "ReplicatedMergeTree invalid metadata_version fix""'. [#60078](https://github.com/ClickHouse/ClickHouse/pull/60078) ([Maksim Kita](https://github.com/kitaisreal)).
* NO CL ENTRY: 'Revert "Implement system.dns_cache table"'. [#60085](https://github.com/ClickHouse/ClickHouse/pull/60085) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Restriction for the access key id for s3."'. [#60181](https://github.com/ClickHouse/ClickHouse/pull/60181) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Do not retry queries if container is down in integration tests"'. [#60215](https://github.com/ClickHouse/ClickHouse/pull/60215) ([Antonio Andelic](https://github.com/antonio2368)).
* NO CL ENTRY: 'Revert "Check stack size in Parser"'. [#60216](https://github.com/ClickHouse/ClickHouse/pull/60216) ([Antonio Andelic](https://github.com/antonio2368)).
* NO CL ENTRY: 'Revert "Support resource request canceling"'. [#60253](https://github.com/ClickHouse/ClickHouse/pull/60253) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Revert "Add definers for views"'. [#60350](https://github.com/ClickHouse/ClickHouse/pull/60350) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Update build-osx.md'. [#60380](https://github.com/ClickHouse/ClickHouse/pull/60380) ([rogeryk](https://github.com/rogeryk)).
* NO CL ENTRY: 'Revert "Fix: IAST::clone() for RENAME"'. [#60398](https://github.com/ClickHouse/ClickHouse/pull/60398) ([Antonio Andelic](https://github.com/antonio2368)).
* NO CL ENTRY: 'Revert "Add table function `mergeTreeIndex`"'. [#60428](https://github.com/ClickHouse/ClickHouse/pull/60428) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Userspace page cache"'. [#60550](https://github.com/ClickHouse/ClickHouse/pull/60550) ([Alexander Tokmakov](https://github.com/tavplubix)).
* NO CL ENTRY: 'Revert "Analyzer: compute ALIAS columns right after reading"'. [#60570](https://github.com/ClickHouse/ClickHouse/pull/60570) ([Alexander Tokmakov](https://github.com/tavplubix)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Analyzer: support aliases and distributed JOINs in StorageMerge [#50894](https://github.com/ClickHouse/ClickHouse/pull/50894) ([Dmitry Novik](https://github.com/novikd)).
* Userspace page cache [#53770](https://github.com/ClickHouse/ClickHouse/pull/53770) ([Michael Kolupaev](https://github.com/al13n321)).
* Simplify optimize-push-to-prewhere from query plan [#58554](https://github.com/ClickHouse/ClickHouse/pull/58554) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Create ch/chc/chl symlinks by cmake as well (for develop mode) [#58609](https://github.com/ClickHouse/ClickHouse/pull/58609) ([Azat Khuzhin](https://github.com/azat)).
* CI: ci cache. step 1 [#58664](https://github.com/ClickHouse/ClickHouse/pull/58664) ([Max K.](https://github.com/maxknv)).
* Enable building JIT with UBSAN [#58952](https://github.com/ClickHouse/ClickHouse/pull/58952) ([Raúl Marín](https://github.com/Algunenano)).
* Support resource request canceling [#59032](https://github.com/ClickHouse/ClickHouse/pull/59032) ([Sergei Trifonov](https://github.com/serxa)).
* Analyzer: Do not resolve remote table id on initiator [#59073](https://github.com/ClickHouse/ClickHouse/pull/59073) ([Dmitry Novik](https://github.com/novikd)).
* Analyzer: Add cast for ConstantNode from constant folding [#59121](https://github.com/ClickHouse/ClickHouse/pull/59121) ([Dmitry Novik](https://github.com/novikd)).
* Fix the default value of `async_insert_max_data_size` in EN document [#59161](https://github.com/ClickHouse/ClickHouse/pull/59161) ([Alex Cheng](https://github.com/Alex-Cheng)).
* CI: Add ARM integration tests [#59241](https://github.com/ClickHouse/ClickHouse/pull/59241) ([Max K.](https://github.com/maxknv)).
* Fix getting filename from read buffer wrappers [#59298](https://github.com/ClickHouse/ClickHouse/pull/59298) ([Kruglov Pavel](https://github.com/Avogar)).
* Update AWS SDK to 1.11.234 [#59299](https://github.com/ClickHouse/ClickHouse/pull/59299) ([Nikita Taranov](https://github.com/nickitat)).
* Split `ISlotControl` from `ConcurrencyControl` [#59313](https://github.com/ClickHouse/ClickHouse/pull/59313) ([Sergei Trifonov](https://github.com/serxa)).
* Some small fixes for docker images [#59337](https://github.com/ClickHouse/ClickHouse/pull/59337) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* CI: bugfix-validate, integration, functional test scripts updates [#59348](https://github.com/ClickHouse/ClickHouse/pull/59348) ([Max K.](https://github.com/maxknv)).
* MaterializedMySQL: Fix gtid_after_attach_test to retry on detach [#59370](https://github.com/ClickHouse/ClickHouse/pull/59370) ([Val Doroshchuk](https://github.com/valbok)).
* Poco Logger small refactoring [#59375](https://github.com/ClickHouse/ClickHouse/pull/59375) ([Maksim Kita](https://github.com/kitaisreal)).
* Add sanity checks for function return types [#59379](https://github.com/ClickHouse/ClickHouse/pull/59379) ([Raúl Marín](https://github.com/Algunenano)).
* Cleanup connection pool surroundings [#59380](https://github.com/ClickHouse/ClickHouse/pull/59380) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix ARRAY JOIN with subcolumns [#59382](https://github.com/ClickHouse/ClickHouse/pull/59382) ([vdimir](https://github.com/vdimir)).
* Update curl submodule to be version 8.50 to address the irrelevant CVE-2023-46218 and CVE-2023-49219, which we don't care about at all. [#59384](https://github.com/ClickHouse/ClickHouse/pull/59384) ([josh-hildred](https://github.com/josh-hildred)).
* Update libxml2 version to address some bogus security issues [#59386](https://github.com/ClickHouse/ClickHouse/pull/59386) ([josh-hildred](https://github.com/josh-hildred)).
* Update version after release [#59393](https://github.com/ClickHouse/ClickHouse/pull/59393) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Job names [#59395](https://github.com/ClickHouse/ClickHouse/pull/59395) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* CI: fix status and report for docker server jobs [#59396](https://github.com/ClickHouse/ClickHouse/pull/59396) ([Max K.](https://github.com/maxknv)).
* Update version_date.tsv and changelogs after v24.1.1.2048-stable [#59397](https://github.com/ClickHouse/ClickHouse/pull/59397) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Forward declaration for PeekableReadBuffer [#59399](https://github.com/ClickHouse/ClickHouse/pull/59399) ([Azat Khuzhin](https://github.com/azat)).
* Progress bar: use FQDN to differentiate metrics from different hosts [#59404](https://github.com/ClickHouse/ClickHouse/pull/59404) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix test test_stop_other_host_during_backup [#59432](https://github.com/ClickHouse/ClickHouse/pull/59432) ([Vitaly Baranov](https://github.com/vitlibar)).
* Update run.sh [#59433](https://github.com/ClickHouse/ClickHouse/pull/59433) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Post a failure status if can not run the CI [#59440](https://github.com/ClickHouse/ClickHouse/pull/59440) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Safer Rust (catch panic with catch_unwind()) [#59447](https://github.com/ClickHouse/ClickHouse/pull/59447) ([Azat Khuzhin](https://github.com/azat)).
* More parallel insert-select pipeline [#59448](https://github.com/ClickHouse/ClickHouse/pull/59448) ([Nikita Taranov](https://github.com/nickitat)).
* CLion says these headers are unused [#59451](https://github.com/ClickHouse/ClickHouse/pull/59451) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix 02720_row_policy_column_with_dots [#59453](https://github.com/ClickHouse/ClickHouse/pull/59453) ([Duc Canh Le](https://github.com/canhld94)).
* Fix problem detected by UBSAN [#59461](https://github.com/ClickHouse/ClickHouse/pull/59461) ([Raúl Marín](https://github.com/Algunenano)).
* Analyzer: Fix denny_crane [#59483](https://github.com/ClickHouse/ClickHouse/pull/59483) ([vdimir](https://github.com/vdimir)).
* Fix `00191_aggregating_merge_tree_and_final` [#59494](https://github.com/ClickHouse/ClickHouse/pull/59494) ([Nikita Taranov](https://github.com/nickitat)).
* Avoid running all checks when `aspell-dict.txt` was changed [#59496](https://github.com/ClickHouse/ClickHouse/pull/59496) ([Aleksandr Musorin](https://github.com/AVMusorin)).
* Fixes for binary.html [#59499](https://github.com/ClickHouse/ClickHouse/pull/59499) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Parallel replicas: better initial replicas failover (2) [#59501](https://github.com/ClickHouse/ClickHouse/pull/59501) ([Igor Nikonov](https://github.com/devcrafter)).
* Update version_date.tsv and changelogs after v24.1.2.5-stable [#59510](https://github.com/ClickHouse/ClickHouse/pull/59510) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.12.3.40-stable [#59511](https://github.com/ClickHouse/ClickHouse/pull/59511) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.11.5.29-stable [#59515](https://github.com/ClickHouse/ClickHouse/pull/59515) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update memory tracker periodically with cgroup memory usage [#59516](https://github.com/ClickHouse/ClickHouse/pull/59516) ([Robert Schulze](https://github.com/rschu1ze)).
* Remove a scary message if an error is retryable [#59517](https://github.com/ClickHouse/ClickHouse/pull/59517) ([alesapin](https://github.com/alesapin)).
* Update the peter-evans/create-pull-request action to v6 [#59520](https://github.com/ClickHouse/ClickHouse/pull/59520) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix usage of StatusType [#59527](https://github.com/ClickHouse/ClickHouse/pull/59527) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Analyzer fix test_select_access_rights/test_main.py::test_select_count [#59528](https://github.com/ClickHouse/ClickHouse/pull/59528) ([vdimir](https://github.com/vdimir)).
* GRPCServer: do not call value() on empty optional query_info [#59533](https://github.com/ClickHouse/ClickHouse/pull/59533) ([Sema Checherinda](https://github.com/CheSema)).
* Use ConnectionPoolPtr instead of raw pointer [#59534](https://github.com/ClickHouse/ClickHouse/pull/59534) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix data race with `AggregatedDataVariants` [#59537](https://github.com/ClickHouse/ClickHouse/pull/59537) ([Nikita Taranov](https://github.com/nickitat)).
* Refactoring of dashboard state encoding [#59554](https://github.com/ClickHouse/ClickHouse/pull/59554) ([Sergei Trifonov](https://github.com/serxa)).
* CI: ci_cache, enable await [#59555](https://github.com/ClickHouse/ClickHouse/pull/59555) ([Max K.](https://github.com/maxknv)).
* Bump libssh to 0.9.8 [#59563](https://github.com/ClickHouse/ClickHouse/pull/59563) ([Robert Schulze](https://github.com/rschu1ze)).
* MultiVersion use mutex [#59565](https://github.com/ClickHouse/ClickHouse/pull/59565) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix aws submodule reference [#59566](https://github.com/ClickHouse/ClickHouse/pull/59566) ([Raúl Marín](https://github.com/Algunenano)).
* Add missed #include <bit> and <exception> [#59567](https://github.com/ClickHouse/ClickHouse/pull/59567) ([Mikhnenko Sasha](https://github.com/4JustMe4)).
* CI: nightly job to update latest docker tag only [#59586](https://github.com/ClickHouse/ClickHouse/pull/59586) ([Max K.](https://github.com/maxknv)).
* Analyzer: compute ALIAS columns right after reading [#59595](https://github.com/ClickHouse/ClickHouse/pull/59595) ([vdimir](https://github.com/vdimir)).
* Add another sanity check for function return types [#59605](https://github.com/ClickHouse/ClickHouse/pull/59605) ([Raúl Marín](https://github.com/Algunenano)).
* Update README.md [#59610](https://github.com/ClickHouse/ClickHouse/pull/59610) ([Tyler Hannan](https://github.com/tylerhannan)).
* Updated a list of trusted contributors [#59616](https://github.com/ClickHouse/ClickHouse/pull/59616) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* CI: fix ast fuzzer job report (slack bot issue) [#59629](https://github.com/ClickHouse/ClickHouse/pull/59629) ([Max K.](https://github.com/maxknv)).
* MergeTree FINAL optimization diagnostics and settings [#59650](https://github.com/ClickHouse/ClickHouse/pull/59650) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix default path when path is not specified in config [#59654](https://github.com/ClickHouse/ClickHouse/pull/59654) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Follow up for [#59277](https://github.com/ClickHouse/ClickHouse/issues/59277) [#59659](https://github.com/ClickHouse/ClickHouse/pull/59659) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Pin python dependencies in stateless tests [#59663](https://github.com/ClickHouse/ClickHouse/pull/59663) ([Raúl Marín](https://github.com/Algunenano)).
* Unquote FLAG_LATEST to fix issue with empty argument [#59672](https://github.com/ClickHouse/ClickHouse/pull/59672) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Temporarily remove a feature that doesn't work [#59688](https://github.com/ClickHouse/ClickHouse/pull/59688) ([Alexander Tokmakov](https://github.com/tavplubix)).
* ConnectionEstablisher: remove unused is_finished [#59706](https://github.com/ClickHouse/ClickHouse/pull/59706) ([Igor Nikonov](https://github.com/devcrafter)).
* Add test for increase-always autoscaling lambda [#59709](https://github.com/ClickHouse/ClickHouse/pull/59709) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Remove SourceWithKeyCondition from ReadFromStorageStep [#59720](https://github.com/ClickHouse/ClickHouse/pull/59720) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add assertions around FixedString code [#59737](https://github.com/ClickHouse/ClickHouse/pull/59737) ([Raúl Marín](https://github.com/Algunenano)).
* Fix skipping unused shards with analyzer [#59741](https://github.com/ClickHouse/ClickHouse/pull/59741) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix DB type check - now it'll refuse to create in Replicated databases [#59743](https://github.com/ClickHouse/ClickHouse/pull/59743) ([Michael Kolupaev](https://github.com/al13n321)).
* Analyzer: Fix test_replicating_constants/test.py::test_different_versions [#59750](https://github.com/ClickHouse/ClickHouse/pull/59750) ([Dmitry Novik](https://github.com/novikd)).
* Fix dashboard params default values [#59753](https://github.com/ClickHouse/ClickHouse/pull/59753) ([Sergei Trifonov](https://github.com/serxa)).
* Fix logical optimizer with LowCardinality in new analyzer [#59766](https://github.com/ClickHouse/ClickHouse/pull/59766) ([Antonio Andelic](https://github.com/antonio2368)).
* Update libuv [#59773](https://github.com/ClickHouse/ClickHouse/pull/59773) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Followup [#50894](https://github.com/ClickHouse/ClickHouse/issues/50894) [#59774](https://github.com/ClickHouse/ClickHouse/pull/59774) ([Dmitry Novik](https://github.com/novikd)).
* CI: ci test await [#59778](https://github.com/ClickHouse/ClickHouse/pull/59778) ([Max K.](https://github.com/maxknv)).
* Better logging for adaptive async timeouts [#59781](https://github.com/ClickHouse/ClickHouse/pull/59781) ([Julia Kartseva](https://github.com/jkartseva)).
* Fix broken youtube embedding in ne-tormozit.md [#59782](https://github.com/ClickHouse/ClickHouse/pull/59782) ([Shaun Struwig](https://github.com/Blargian)).
* Hide URL/S3 'headers' argument in SHOW CREATE [#59787](https://github.com/ClickHouse/ClickHouse/pull/59787) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)).
* Update version_date.tsv and changelogs after v24.1.3.31-stable [#59799](https://github.com/ClickHouse/ClickHouse/pull/59799) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v23.12.4.15-stable [#59800](https://github.com/ClickHouse/ClickHouse/pull/59800) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Analyzer: fix test_access_for_functions/test.py::test_access_rights_for_function [#59801](https://github.com/ClickHouse/ClickHouse/pull/59801) ([Dmitry Novik](https://github.com/novikd)).
* Analyzer: Fix test_wrong_db_or_table_name/test.py::test_wrong_table_name [#59806](https://github.com/ClickHouse/ClickHouse/pull/59806) ([Dmitry Novik](https://github.com/novikd)).
* CI: await tune ups [#59807](https://github.com/ClickHouse/ClickHouse/pull/59807) ([Max K.](https://github.com/maxknv)).
* Enforce tests with enabled analyzer in CI [#59814](https://github.com/ClickHouse/ClickHouse/pull/59814) ([Dmitry Novik](https://github.com/novikd)).
* Handle different timestamp related aspects of zip-files [#59815](https://github.com/ClickHouse/ClickHouse/pull/59815) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix settings history azure_max_single_part_copy_size [#59819](https://github.com/ClickHouse/ClickHouse/pull/59819) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Updated a list of trusted contributors [#59844](https://github.com/ClickHouse/ClickHouse/pull/59844) ([Maksim Kita](https://github.com/kitaisreal)).
* Add check for recursiveRemoveLowCardinality() [#59845](https://github.com/ClickHouse/ClickHouse/pull/59845) ([Vitaly Baranov](https://github.com/vitlibar)).
* Better warning for disabled kernel.task_delayacct [#59846](https://github.com/ClickHouse/ClickHouse/pull/59846) ([Azat Khuzhin](https://github.com/azat)).
* Reintroduce 02590_interserver_mode_client_info_initial_query_start_time [#59851](https://github.com/ClickHouse/ClickHouse/pull/59851) ([Azat Khuzhin](https://github.com/azat)).
* Respect CMAKE_OSX_DEPLOYMENT_TARGET for Rust targets [#59852](https://github.com/ClickHouse/ClickHouse/pull/59852) ([Azat Khuzhin](https://github.com/azat)).
* Do not reinitialize ZooKeeperWithFaultInjection on each chunk [#59854](https://github.com/ClickHouse/ClickHouse/pull/59854) ([Alexander Gololobov](https://github.com/davenger)).
* Fix: check if std::function is set before calling it [#59858](https://github.com/ClickHouse/ClickHouse/pull/59858) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix long shutdown of FileLog storage [#59873](https://github.com/ClickHouse/ClickHouse/pull/59873) ([Azat Khuzhin](https://github.com/azat)).
* tests: fix 02322_sql_insert_format flakiness [#59874](https://github.com/ClickHouse/ClickHouse/pull/59874) ([Azat Khuzhin](https://github.com/azat)).
* Follow up for [#58554](https://github.com/ClickHouse/ClickHouse/issues/58554). Cleanup. [#59889](https://github.com/ClickHouse/ClickHouse/pull/59889) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* CI: Fix job failures due to jepsen artifacts [#59890](https://github.com/ClickHouse/ClickHouse/pull/59890) ([Max K.](https://github.com/maxknv)).
* Add test 02988_join_using_prewhere_pushdown [#59892](https://github.com/ClickHouse/ClickHouse/pull/59892) ([vdimir](https://github.com/vdimir)).
* Do not pull mutations if pulling replication log had been stopped [#59895](https://github.com/ClickHouse/ClickHouse/pull/59895) ([Azat Khuzhin](https://github.com/azat)).
* Fix `02982_comments_in_system_tables` [#59896](https://github.com/ClickHouse/ClickHouse/pull/59896) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
* Refactor Squashing for inserts. [#59899](https://github.com/ClickHouse/ClickHouse/pull/59899) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Do not rebuild a lambda package if it is updated [#59902](https://github.com/ClickHouse/ClickHouse/pull/59902) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix self-extracting: macOS doesn't allow to run renamed executable - copy instead [#59906](https://github.com/ClickHouse/ClickHouse/pull/59906) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Update tests with indexHint for analyzer. [#59907](https://github.com/ClickHouse/ClickHouse/pull/59907) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Petite cleanup around macros and ReplicatedMergeTree [#59909](https://github.com/ClickHouse/ClickHouse/pull/59909) ([Azat Khuzhin](https://github.com/azat)).
* Fix: absence of closing record in query_log for failed insert over http [#59910](https://github.com/ClickHouse/ClickHouse/pull/59910) ([Igor Nikonov](https://github.com/devcrafter)).
* Decrease logging level for http retriable errors to Warning (and fix 00157_cache_dictionary flakiness) [#59920](https://github.com/ClickHouse/ClickHouse/pull/59920) ([Azat Khuzhin](https://github.com/azat)).
* Remove `test_distributed_backward_compatability` [#59921](https://github.com/ClickHouse/ClickHouse/pull/59921) ([Dmitry Novik](https://github.com/novikd)).
* Commands node args should add rvalue to push_back to reduce object copy cost [#59922](https://github.com/ClickHouse/ClickHouse/pull/59922) ([xuzifu666](https://github.com/xuzifu666)).
* tests: fix 02981_vertical_merges_memory_usage flakiness [#59923](https://github.com/ClickHouse/ClickHouse/pull/59923) ([Azat Khuzhin](https://github.com/azat)).
* Analyzer: Update broken integration tests list [#59924](https://github.com/ClickHouse/ClickHouse/pull/59924) ([Dmitry Novik](https://github.com/novikd)).
* CI: integration tests to mysql80 [#59939](https://github.com/ClickHouse/ClickHouse/pull/59939) ([Max K.](https://github.com/maxknv)).
* Register StorageMergeTree exception message fix [#59941](https://github.com/ClickHouse/ClickHouse/pull/59941) ([Maksim Kita](https://github.com/kitaisreal)).
* Replace lambdas with pointers to members to simplify stacks [#59944](https://github.com/ClickHouse/ClickHouse/pull/59944) ([Alexander Gololobov](https://github.com/davenger)).
* Analyzer: Fix test_user_defined_object_persistence [#59948](https://github.com/ClickHouse/ClickHouse/pull/59948) ([Dmitry Novik](https://github.com/novikd)).
* Analyzer: Fix test_mutations_with_merge_tree [#59951](https://github.com/ClickHouse/ClickHouse/pull/59951) ([Dmitry Novik](https://github.com/novikd)).
* Cleanups [#59964](https://github.com/ClickHouse/ClickHouse/pull/59964) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version_date.tsv and changelogs after v24.1.4.19-stable [#59966](https://github.com/ClickHouse/ClickHouse/pull/59966) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Less conflicts [#59968](https://github.com/ClickHouse/ClickHouse/pull/59968) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* We don't have external dictionaries from Aerospike [#59969](https://github.com/ClickHouse/ClickHouse/pull/59969) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix max num to warn message [#59972](https://github.com/ClickHouse/ClickHouse/pull/59972) ([Jordi Villar](https://github.com/jrdi)).
* Analyzer: Fix test_settings_profile [#59975](https://github.com/ClickHouse/ClickHouse/pull/59975) ([Dmitry Novik](https://github.com/novikd)).
* Update version_date.tsv and changelogs after v24.1.4.20-stable [#59978](https://github.com/ClickHouse/ClickHouse/pull/59978) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Analyzer: Fix test_storage_rabbitmq [#59981](https://github.com/ClickHouse/ClickHouse/pull/59981) ([Dmitry Novik](https://github.com/novikd)).
* Analyzer: Fix test_shard_level_const_function [#59983](https://github.com/ClickHouse/ClickHouse/pull/59983) ([Dmitry Novik](https://github.com/novikd)).
* Add newlines to SettingsChangesHistory to maybe have less conflicts [#59984](https://github.com/ClickHouse/ClickHouse/pull/59984) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove context from comparison functions. [#59985](https://github.com/ClickHouse/ClickHouse/pull/59985) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Update version_date.tsv and changelogs after v24.1.5.6-stable [#59993](https://github.com/ClickHouse/ClickHouse/pull/59993) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Revert "Insert synchronously if dependent MV deduplication is enabled" [#59998](https://github.com/ClickHouse/ClickHouse/pull/59998) ([Julia Kartseva](https://github.com/jkartseva)).
* Fix obviously wrong (but non significant) error in dictionaries [#60005](https://github.com/ClickHouse/ClickHouse/pull/60005) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Inhibit randomization in some tests [#60009](https://github.com/ClickHouse/ClickHouse/pull/60009) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* The code should not be complex [#60010](https://github.com/ClickHouse/ClickHouse/pull/60010) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Exclude test run from a slow build [#60011](https://github.com/ClickHouse/ClickHouse/pull/60011) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix broken lambdas formatting [#60012](https://github.com/ClickHouse/ClickHouse/pull/60012) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Verify formatting consistency on the server-side [#60013](https://github.com/ClickHouse/ClickHouse/pull/60013) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Analyzer: Fix test_sql_user_defined_functions_on_cluster [#60019](https://github.com/ClickHouse/ClickHouse/pull/60019) ([Dmitry Novik](https://github.com/novikd)).
* Fix 02981_vertical_merges_memory_usage with SharedMergeTree [#60028](https://github.com/ClickHouse/ClickHouse/pull/60028) ([Raúl Marín](https://github.com/Algunenano)).
* Fix 01656_test_query_log_factories_info with analyzer. [#60037](https://github.com/ClickHouse/ClickHouse/pull/60037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Disable tests with coverage [#60047](https://github.com/ClickHouse/ClickHouse/pull/60047) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Print CPU flags at startup [#60075](https://github.com/ClickHouse/ClickHouse/pull/60075) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Cleanup: less confusion between config priority and balancing priority in connection pools [#60077](https://github.com/ClickHouse/ClickHouse/pull/60077) ([Igor Nikonov](https://github.com/devcrafter)).
* Temporary table already exists exception message fix [#60080](https://github.com/ClickHouse/ClickHouse/pull/60080) ([Maksim Kita](https://github.com/kitaisreal)).
* Refactor prewhere and primary key optimization [#60082](https://github.com/ClickHouse/ClickHouse/pull/60082) ([Amos Bird](https://github.com/amosbird)).
* Bump curl to version 4.6.0 [#60084](https://github.com/ClickHouse/ClickHouse/pull/60084) ([josh-hildred](https://github.com/josh-hildred)).
* Check wrong abbreviations [#60086](https://github.com/ClickHouse/ClickHouse/pull/60086) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove the check for formatting consistency from the Fuzzer [#60088](https://github.com/ClickHouse/ClickHouse/pull/60088) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Avoid overflow in settings [#60089](https://github.com/ClickHouse/ClickHouse/pull/60089) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* A small preparation for better handling of primary key in memory [#60092](https://github.com/ClickHouse/ClickHouse/pull/60092) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Move threadPoolCallbackRunner to the "Common" folder [#60097](https://github.com/ClickHouse/ClickHouse/pull/60097) ([Vitaly Baranov](https://github.com/vitlibar)).
* Speed up the CI [#60106](https://github.com/ClickHouse/ClickHouse/pull/60106) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Insignificant changes [#60108](https://github.com/ClickHouse/ClickHouse/pull/60108) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Do not retry queries if container is down in integration tests [#60109](https://github.com/ClickHouse/ClickHouse/pull/60109) ([Azat Khuzhin](https://github.com/azat)).
* Better check for inconsistent formatting [#60110](https://github.com/ClickHouse/ClickHouse/pull/60110) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* skip printing meaningless log [#60123](https://github.com/ClickHouse/ClickHouse/pull/60123) ([conic](https://github.com/conicl)).
* Implement TODO [#60124](https://github.com/ClickHouse/ClickHouse/pull/60124) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix bad log message [#60125](https://github.com/ClickHouse/ClickHouse/pull/60125) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix data race in `IMergeTreeDataPart` [#60139](https://github.com/ClickHouse/ClickHouse/pull/60139) ([Antonio Andelic](https://github.com/antonio2368)).
* Add new setting to changes history [#60141](https://github.com/ClickHouse/ClickHouse/pull/60141) ([Antonio Andelic](https://github.com/antonio2368)).
* Analyzer: fix row level filters with PREWHERE + additional filters [#60142](https://github.com/ClickHouse/ClickHouse/pull/60142) ([vdimir](https://github.com/vdimir)).
* Tests: query log for inserts over http [#60143](https://github.com/ClickHouse/ClickHouse/pull/60143) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix build in master [#60151](https://github.com/ClickHouse/ClickHouse/pull/60151) ([Raúl Marín](https://github.com/Algunenano)).
* Add setting history check to stateless tests [#60154](https://github.com/ClickHouse/ClickHouse/pull/60154) ([Raúl Marín](https://github.com/Algunenano)).
* Mini cleanup of CPUID.h [#60155](https://github.com/ClickHouse/ClickHouse/pull/60155) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix: custom key failover test flakiness [#60158](https://github.com/ClickHouse/ClickHouse/pull/60158) ([Igor Nikonov](https://github.com/devcrafter)).
* Skip sanity checks on secondary CREATE query [#60159](https://github.com/ClickHouse/ClickHouse/pull/60159) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Remove extensively aggressive check [#60162](https://github.com/ClickHouse/ClickHouse/pull/60162) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix wrong message during compilation [#60178](https://github.com/ClickHouse/ClickHouse/pull/60178) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add a test for [#44318](https://github.com/ClickHouse/ClickHouse/issues/44318) [#60179](https://github.com/ClickHouse/ClickHouse/pull/60179) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add test for 59437 [#60191](https://github.com/ClickHouse/ClickHouse/pull/60191) ([Raúl Marín](https://github.com/Algunenano)).
* CI: hot fix for gh statuses [#60201](https://github.com/ClickHouse/ClickHouse/pull/60201) ([Max K.](https://github.com/maxknv)).
* Limit libarchive format to what we use [#60203](https://github.com/ClickHouse/ClickHouse/pull/60203) ([San](https://github.com/santrancisco)).
* Fix bucket region discovery [#60204](https://github.com/ClickHouse/ClickHouse/pull/60204) ([Nikita Taranov](https://github.com/nickitat)).
* Fix `test_backup_restore_s3/test.py::test_user_specific_auth` [#60210](https://github.com/ClickHouse/ClickHouse/pull/60210) ([Antonio Andelic](https://github.com/antonio2368)).
* CI: combine analyzer, s3, dbreplicated into one job [#60224](https://github.com/ClickHouse/ClickHouse/pull/60224) ([Max K.](https://github.com/maxknv)).
* Slightly better Keeper loading from snapshot [#60226](https://github.com/ClickHouse/ClickHouse/pull/60226) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix: IAST::clone() for RENAME [#60227](https://github.com/ClickHouse/ClickHouse/pull/60227) ([Igor Nikonov](https://github.com/devcrafter)).
* Treat 2+ in allow_experimental_parallel_reading_from_replicas as 2 [#60228](https://github.com/ClickHouse/ClickHouse/pull/60228) ([Raúl Marín](https://github.com/Algunenano)).
* CI: random job pick support [#60229](https://github.com/ClickHouse/ClickHouse/pull/60229) ([Max K.](https://github.com/maxknv)).
* Fix analyzer - hide arguments for secret functions [#60230](https://github.com/ClickHouse/ClickHouse/pull/60230) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Backups delete suspicious file [#60231](https://github.com/ClickHouse/ClickHouse/pull/60231) ([Maksim Kita](https://github.com/kitaisreal)).
* CI: random sanitizer for parallel repl in PR wf [#60234](https://github.com/ClickHouse/ClickHouse/pull/60234) ([Max K.](https://github.com/maxknv)).
* CI: use aarch runner for runconfig job [#60236](https://github.com/ClickHouse/ClickHouse/pull/60236) ([Max K.](https://github.com/maxknv)).
* Add test for 60232 [#60244](https://github.com/ClickHouse/ClickHouse/pull/60244) ([Raúl Marín](https://github.com/Algunenano)).
* Make cloud sync required [#60245](https://github.com/ClickHouse/ClickHouse/pull/60245) ([Raúl Marín](https://github.com/Algunenano)).
* Tests from [#60094](https://github.com/ClickHouse/ClickHouse/issues/60094) [#60256](https://github.com/ClickHouse/ClickHouse/pull/60256) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove bad check in Keeper [#60266](https://github.com/ClickHouse/ClickHouse/pull/60266) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix integration `test_backup_restore_s3` [#60269](https://github.com/ClickHouse/ClickHouse/pull/60269) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Ignore valid 'No such key errors' in stress tests [#60270](https://github.com/ClickHouse/ClickHouse/pull/60270) ([Raúl Marín](https://github.com/Algunenano)).
* Stress test: Include the first sanitizer block message in the report [#60283](https://github.com/ClickHouse/ClickHouse/pull/60283) ([Raúl Marín](https://github.com/Algunenano)).
* Update analyzer_tech_debt.txt [#60303](https://github.com/ClickHouse/ClickHouse/pull/60303) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Minor fixes for hashed dictionary [#60310](https://github.com/ClickHouse/ClickHouse/pull/60310) ([vdimir](https://github.com/vdimir)).
* Install tailscale during AMI build and set it up on runners [#60316](https://github.com/ClickHouse/ClickHouse/pull/60316) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* CI: remove Integration tests asan and release from PR wf [#60327](https://github.com/ClickHouse/ClickHouse/pull/60327) ([Max K.](https://github.com/maxknv)).
* Fix - analyzer related - "executable" function subquery arguments. [#60339](https://github.com/ClickHouse/ClickHouse/pull/60339) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Update settings.md to correct the description for setting `max_concurrent_queries_for_user` [#60343](https://github.com/ClickHouse/ClickHouse/pull/60343) ([Alex Cheng](https://github.com/Alex-Cheng)).
* Fix rapidjson submodule [#60346](https://github.com/ClickHouse/ClickHouse/pull/60346) ([Raúl Marín](https://github.com/Algunenano)).
* Validate experimental and suspicious types inside nested types under a setting [#60353](https://github.com/ClickHouse/ClickHouse/pull/60353) ([Kruglov Pavel](https://github.com/Avogar)).
* Update 01158_zookeeper_log_long.sql [#60357](https://github.com/ClickHouse/ClickHouse/pull/60357) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Add missed #include <mutex> [#60358](https://github.com/ClickHouse/ClickHouse/pull/60358) ([Mikhnenko Sasha](https://github.com/4JustMe4)).
* Follow up [#60082](https://github.com/ClickHouse/ClickHouse/issues/60082) [#60360](https://github.com/ClickHouse/ClickHouse/pull/60360) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove ALTER LIVE VIEW [#60370](https://github.com/ClickHouse/ClickHouse/pull/60370) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
* Expose fatal.log separately for fuzzer [#60374](https://github.com/ClickHouse/ClickHouse/pull/60374) ([Azat Khuzhin](https://github.com/azat)).
* Minor changes for dashboard [#60387](https://github.com/ClickHouse/ClickHouse/pull/60387) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove unused method [#60388](https://github.com/ClickHouse/ClickHouse/pull/60388) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow to map UI handlers to different paths [#60389](https://github.com/ClickHouse/ClickHouse/pull/60389) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove old tags from integration tests [#60407](https://github.com/ClickHouse/ClickHouse/pull/60407) ([Raúl Marín](https://github.com/Algunenano)).
* Update `liburing` to 2.5 [#60409](https://github.com/ClickHouse/ClickHouse/pull/60409) ([Nikita Taranov](https://github.com/nickitat)).
* Fix undefined-behavior in case of too big max_execution_time setting [#60419](https://github.com/ClickHouse/ClickHouse/pull/60419) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix wrong log message in Fuzzer [#60425](https://github.com/ClickHouse/ClickHouse/pull/60425) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix unrestricted reads from keeper [#60429](https://github.com/ClickHouse/ClickHouse/pull/60429) ([Raúl Marín](https://github.com/Algunenano)).
* Split update_mergeable_check into two functions to force trigger the status [#60431](https://github.com/ClickHouse/ClickHouse/pull/60431) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Revert "Revert "Add table function `mergeTreeIndex`"" [#60435](https://github.com/ClickHouse/ClickHouse/pull/60435) ([Anton Popov](https://github.com/CurtizJ)).
* Revert "Merge pull request [#56864](https://github.com/ClickHouse/ClickHouse/issues/56864) from ClickHouse/broken-projections-better-handling" [#60436](https://github.com/ClickHouse/ClickHouse/pull/60436) ([Nikita Taranov](https://github.com/nickitat)).
* Keeper: fix moving changelog files between disks [#60442](https://github.com/ClickHouse/ClickHouse/pull/60442) ([Antonio Andelic](https://github.com/antonio2368)).
* Replace deprecated distutils by vendored packaging [#60444](https://github.com/ClickHouse/ClickHouse/pull/60444) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Do not fail the build if ci-logs is not healthy [#60445](https://github.com/ClickHouse/ClickHouse/pull/60445) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Move setting `enable_order_by_all` out of the experimental setting section [#60449](https://github.com/ClickHouse/ClickHouse/pull/60449) ([Robert Schulze](https://github.com/rschu1ze)).
* Minor: Replace `boost::algorithm::starts_with()` by `std::string::starts_with()` [#60450](https://github.com/ClickHouse/ClickHouse/pull/60450) ([Robert Schulze](https://github.com/rschu1ze)).
* Minor: Replace boost::algorithm::ends_with() by std::string::ends_with() [#60454](https://github.com/ClickHouse/ClickHouse/pull/60454) ([Robert Schulze](https://github.com/rschu1ze)).
* CI: remove input params for job scripts [#60455](https://github.com/ClickHouse/ClickHouse/pull/60455) ([Max K.](https://github.com/maxknv)).
* Fix: 02496_remove_redundant_sorting_analyzer [#60456](https://github.com/ClickHouse/ClickHouse/pull/60456) ([Igor Nikonov](https://github.com/devcrafter)).
* PR template fix to include ci fix category [#60461](https://github.com/ClickHouse/ClickHouse/pull/60461) ([Max K.](https://github.com/maxknv)).
* Reduce iterations in 01383_log_broken_table [#60465](https://github.com/ClickHouse/ClickHouse/pull/60465) ([Raúl Marín](https://github.com/Algunenano)).
* Merge [#57434](https://github.com/ClickHouse/ClickHouse/issues/57434) [#60466](https://github.com/ClickHouse/ClickHouse/pull/60466) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix bad test: looks like an obvious race condition, but I didn't check in detail. [#60471](https://github.com/ClickHouse/ClickHouse/pull/60471) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Make test slower [#60472](https://github.com/ClickHouse/ClickHouse/pull/60472) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix cgroups v1 rss parsing in CgroupsMemoryUsageObserver [#60481](https://github.com/ClickHouse/ClickHouse/pull/60481) ([Maksim Kita](https://github.com/kitaisreal)).
* CI: fix pr check status to not fail mergeable check [#60483](https://github.com/ClickHouse/ClickHouse/pull/60483) ([Max K.](https://github.com/maxknv)).
* Report respects skipped builds [#60488](https://github.com/ClickHouse/ClickHouse/pull/60488) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* CI: quick style fix [#60490](https://github.com/ClickHouse/ClickHouse/pull/60490) ([Max K.](https://github.com/maxknv)).
* Decrease logging level for http retriable errors to Info [#60508](https://github.com/ClickHouse/ClickHouse/pull/60508) ([Raúl Marín](https://github.com/Algunenano)).
* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).

View File

@ -276,5 +276,3 @@ Besides, each replica stores its state in ZooKeeper as the set of parts and its
:::note
The ClickHouse cluster consists of independent shards, and each shard consists of replicas. The cluster is **not elastic**, so after adding a new shard, data is not rebalanced between shards automatically. Instead, the cluster load is supposed to be adjusted to be uneven. This implementation gives you more control, and it is ok for relatively small clusters, such as tens of nodes. But for clusters with hundreds of nodes that we are using in production, this approach becomes a significant drawback. We should implement a table engine that spans across the cluster with dynamically replicated regions that could be split and balanced between clusters automatically.
:::
[Original article](https://clickhouse.com/docs/en/development/architecture/)

View File

@ -14,20 +14,6 @@ Supported platforms:
- PowerPC 64 LE (experimental)
- RISC-V 64 (experimental)
## Building in docker
We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage:
```bash
# define a directory for the output artifacts
output_dir="build_results"
# a simplest build
./docker/packager/packager --package-type=binary --output-dir "$output_dir"
# build debian packages
./docker/packager/packager --package-type=deb --output-dir "$output_dir"
# by default, debian packages use thin LTO, so we can override it to speed up the build
CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results"
```
## Building on Ubuntu
The following tutorial is based on Ubuntu Linux.
@ -37,6 +23,7 @@ The minimum recommended Ubuntu version for development is 22.04 LTS.
### Install Prerequisites {#install-prerequisites}
``` bash
sudo apt-get update
sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-release wget software-properties-common gnupg
```
@ -57,7 +44,7 @@ sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
For other Linux distributions - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html).
As of August 2023, clang-16 or higher will work.
As of March 2024, clang-17 or higher will work.
GCC as a compiler is not supported.
To build with a specific Clang version:
@ -67,8 +54,8 @@ to see what version you have installed before setting this environment variable.
:::
``` bash
export CC=clang-17
export CXX=clang++-17
export CC=clang-18
export CXX=clang++-18
```
### Checkout ClickHouse Sources {#checkout-clickhouse-sources}
@ -133,3 +120,17 @@ mkdir build
cmake -S . -B build
cmake --build build
```
## Building in docker
We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage:
```bash
# define a directory for the output artifacts
output_dir="build_results"
# a simplest build
./docker/packager/packager --package-type=binary --output-dir "$output_dir"
# build debian packages
./docker/packager/packager --package-type=deb --output-dir "$output_dir"
# by default, debian packages use thin LTO, so we can override it to speed up the build
CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results"
```

View File

@ -28,7 +28,7 @@ The CSV file has 10 columns:
```response
┌─name─────────────────┬─type─────────────┐
│ date │ Nullable(String)
│ date │ Nullable(Date)
│ location_key │ Nullable(String) │
│ new_confirmed │ Nullable(Int64) │
│ new_deceased │ Nullable(Int64) │

View File

@ -23,7 +23,6 @@ As of November 8th, 2022, each TSV is approximately the following size and numbe
# Table of Contents
- [ClickHouse GitHub data](#clickhouse-github-data)
- [Table of Contents](#table-of-contents)
- [Generating the data](#generating-the-data)
- [Downloading and inserting the data](#downloading-and-inserting-the-data)

View File

@ -7,6 +7,7 @@ title: Formats for Input and Output Data
ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read a dictionary. A format supported for output can be used to arrange the
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
All format names are case insensitive.
The supported formats are:

View File

@ -69,5 +69,3 @@ psql "port=9005 host=127.0.0.1 user=alice dbname=default sslcert=/path/to/certif
```
View the [PostgreSQL docs](https://jdbc.postgresql.org/documentation/head/ssl-client.html) for more details on their SSL settings.
[Original article](https://clickhouse.com/docs/en/interfaces/postgresql)

View File

@ -549,6 +549,48 @@ Result:
└───────┴─────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
##### input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects
Enabling this setting allows to use String type for ambiguous paths during named tuples inference from JSON objects (when `input_format_json_try_infer_named_tuples_from_objects` is enabled) instead of an exception.
It allows to read JSON objects as named Tuples even if there are ambiguous paths.
Disabled by default.
**Examples**
With disabled setting:
```sql
SET input_format_json_try_infer_named_tuples_from_objects = 1;
SET input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = 0;
DESC format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : "Hello"}}}');
```
Result:
```text
Code: 636. DB::Exception: The table structure cannot be extracted from a JSONEachRow format file. Error:
Code: 117. DB::Exception: JSON objects have ambiguous data: in some objects path 'a' has type 'Int64' and in some - 'Tuple(b String)'. You can enable setting input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects to use String type for path 'a'. (INCORRECT_DATA) (version 24.3.1.1).
You can specify the structure manually. (CANNOT_EXTRACT_TABLE_STRUCTURE)
```
With enabled setting:
```sql
SET input_format_json_try_infer_named_tuples_from_objects = 1;
SET input_format_json_use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects = 1;
DESC format(JSONEachRow, '{"obj" : "a" : 42}, {"obj" : {"a" : {"b" : "Hello"}}}');
SELECT * FROM format(JSONEachRow, '{"obj" : {"a" : 42}}, {"obj" : {"a" : {"b" : "Hello"}}}');
```
Result:
```text
┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ obj │ Tuple(a Nullable(String)) │ │ │ │ │ │
└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
┌─obj─────────────────┐
│ ('42') │
│ ('{"b" : "Hello"}') │
└─────────────────────┘
```
##### input_format_json_read_objects_as_strings
Enabling this setting allows reading nested JSON objects as strings.
@ -1554,6 +1596,28 @@ DESC format(JSONEachRow, $$
└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
#### input_format_try_infer_exponent_floats
If enabled, ClickHouse will try to infer floats in exponential form for text formats (except JSON where numbers in exponential form are always inferred).
Disabled by default.
**Example**
```sql
SET input_format_try_infer_exponent_floats = 1;
DESC format(CSV,
$$1.1E10
2.3e-12
42E00
$$)
```
```response
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ c1 │ Nullable(Float64) │ │ │ │ │ │
└──────┴───────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
## Self describing formats {#self-describing-formats}
Self-describing formats contain information about the structure of the data in the data itself,

View File

@ -275,6 +275,16 @@ Cache profile events:
- `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`
## Using in-memory cache (userspace page cache) {#userspace-page-cache}
The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files.
To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`.
By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`.
Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled.
## Storing Data on Web Server {#storing-data-on-webserver}
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.

View File

@ -49,5 +49,3 @@ build_id:
**See also**
- [trace_log](../../operations/system-tables/trace_log.md) system table
[Original article](https://clickhouse.com/docs/en/operations/system-tables/crash-log)

View File

@ -297,8 +297,6 @@ end script
If you use antivirus software configure it to skip folders with ClickHouse datafiles (`/var/lib/clickhouse`) otherwise performance may be reduced and you may experience unexpected errors during data ingestion and background merges.
[Original article](https://clickhouse.com/docs/en/operations/tips/)
## Related Content
- [Getting started with ClickHouse? Here are 13 "Deadly Sins" and how to avoid them](https://clickhouse.com/blog/common-getting-started-issues-with-clickhouse)

View File

@ -0,0 +1,55 @@
---
slug: /en/sql-reference/aggregate-functions/reference/approxtopk
sidebar_position: 212
---
# approx_top_k
Returns an array of the approximately most frequent values and their counts in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves).
``` sql
approx_top_k(N)(column)
approx_top_k(N, reserved)(column)
```
This function does not provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that arent the most frequent values.
We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`.
**Parameters**
- `N` — The number of elements to return. Optional. Default value: 10.
- `reserved` — Defines, how many cells reserved for values. If uniq(column) > reserved, result of topK function will be approximate. Optional. Default value: N * 3.
**Arguments**
- `column` — The value to calculate frequency.
**Example**
Query:
``` sql
SELECT approx_top_k(2)(k)
FROM VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10));
```
Result:
``` text
┌─approx_top_k(2)(k)────┐
│ [('y',3,0),('x',1,0)] │
└───────────────────────┘
```
# approx_top_count
Is an alias to `approx_top_k` function
**See Also**
- [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [approx_top_sum](../../../sql-reference/aggregate-functions/reference/approxtopsum.md)

View File

@ -0,0 +1,51 @@
---
slug: /en/sql-reference/aggregate-functions/reference/approxtopsum
sidebar_position: 212
---
# approx_top_sum
Returns an array of the approximately most frequent values and their counts in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves). Additionally, the weight of the value is taken into account.
``` sql
approx_top_sum(N)(column, weight)
approx_top_sum(N, reserved)(column, weight)
```
This function does not provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that arent the most frequent values.
We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`.
**Parameters**
- `N` — The number of elements to return. Optional. Default value: 10.
- `reserved` — Defines, how many cells reserved for values. If uniq(column) > reserved, result of topK function will be approximate. Optional. Default value: N * 3.
**Arguments**
- `column` — The value to calculate frequency.
- `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT approx_top_sum(2)(k, w)
FROM VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10))
```
Result:
``` text
┌─approx_top_sum(2)(k, w)─┐
│ [('z',10,0),('x',5,0)] │
└─────────────────────────┘
```
**See Also**
- [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [approx_top_k](../../../sql-reference/aggregate-functions/reference/approxtopk.md)

View File

@ -11,21 +11,23 @@ Implements the [Filtered Space-Saving](https://doi.org/10.1016/j.ins.2010.08.024
``` sql
topK(N)(column)
topK(N, load_factor)(column)
topK(N, load_factor, 'counts')(column)
```
This function does not provide a guaranteed result. In certain situations, errors might occur and it might return frequent values that arent the most frequent values.
We recommend using the `N < 10` value; performance is reduced with large `N` values. Maximum value of `N = 65536`.
**Parameters**
- `N` — The number of elements to return. Optional. Default value: 10.
- `load_factor` — Defines, how many cells reserved for values. If uniq(column) > N * load_factor, result of topK function will be approximate. Optional. Default value: 3.
- `counts` — Defines, should result contain approximate count and error value.
**Arguments**
- `N` The number of elements to return.
If the parameter is omitted, default value 10 is used.
**Arguments**
- `x` The value to calculate frequency.
- `column` — The value to calculate frequency.
**Example**
@ -41,3 +43,9 @@ FROM ontime
│ [19393,19790,19805] │
└─────────────────────┘
```
**See Also**
- [topKWeighted](../../../sql-reference/aggregate-functions/reference/topkweighted.md)
- [approx_top_k](../../../sql-reference/aggregate-functions/reference/approxtopk.md)
- [approx_top_sum](../../../sql-reference/aggregate-functions/reference/approxtopsum.md)

View File

@ -10,13 +10,20 @@ Returns an array of the approximately most frequent values in the specified colu
**Syntax**
``` sql
topKWeighted(N)(x, weight)
topKWeighted(N)(column, weight)
topKWeighted(N, load_factor)(column, weight)
topKWeighted(N, load_factor, 'counts')(column, weight)
```
**Parameters**
- `N` — The number of elements to return. Optional. Default value: 10.
- `load_factor` — Defines, how many cells reserved for values. If uniq(column) > N * load_factor, result of topK function will be approximate. Optional. Default value: 3.
- `counts` — Defines, should result contain approximate count and error value.
**Arguments**
- `N` — The number of elements to return.
- `x` — The value.
- `column` — The value.
- `weight` — The weight. Every value is accounted `weight` times for frequency calculation. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned value**
@ -40,6 +47,23 @@ Result:
└────────────────────────┘
```
Query:
``` sql
SELECT topKWeighted(2, 10, 'counts')(k, w)
FROM VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10))
```
Result:
``` text
┌─topKWeighted(2, 10, 'counts')(k, w)─┐
│ [('z',10,0),('x',5,0)] │
└─────────────────────────────────────┘
```
**See Also**
- [topK](../../../sql-reference/aggregate-functions/reference/topk.md)
- [approx_top_k](../../../sql-reference/aggregate-functions/reference/approxtopk.md)
- [approx_top_sum](../../../sql-reference/aggregate-functions/reference/approxtopsum.md)

View File

@ -394,7 +394,8 @@ Result:
## toYear
Returns the year component (AD) of a date or date with time.
Converts a date or date with time to the year number (AD) as `UInt16` value.
**Syntax**
@ -430,7 +431,7 @@ Result:
## toQuarter
Returns the quarter (1-4) of a date or date with time.
Converts a date or date with time to the quarter number (1-4) as `UInt8` value.
**Syntax**
@ -464,9 +465,10 @@ Result:
└──────────────────────────────────────────────┘
```
## toMonth
Returns the month component (1-12) of a date or date with time.
Converts a date or date with time to the month number (1-12) as `UInt8` value.
**Syntax**
@ -502,7 +504,7 @@ Result:
## toDayOfYear
Returns the number of the day within the year (1-366) of a date or date with time.
Converts a date or date with time to the number of the day of the year (1-366) as `UInt16` value.
**Syntax**
@ -538,7 +540,7 @@ Result:
## toDayOfMonth
Returns the number of the day within the month (1-31) of a date or date with time.
Converts a date or date with time to the number of the day in the month (1-31) as `UInt8` value.
**Syntax**
@ -574,7 +576,7 @@ Result:
## toDayOfWeek
Returns the number of the day within the week of a date or date with time.
Converts a date or date with time to the number of the day in the week as `UInt8` value.
The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is omitted, the default mode is 0. The time zone of the date can be specified as the third argument.
@ -625,7 +627,7 @@ Result:
## toHour
Returns the hour component (0-24) of a date with time.
Converts a date with time to the number of the hour in 24-hour time (0-23) as `UInt8` value.
Assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always exactly when it occurs - it depends on the timezone).
@ -639,7 +641,7 @@ Alias: `HOUR`
**Arguments**
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
@ -663,7 +665,7 @@ Result:
## toMinute
Returns the minute component (0-59) a date with time.
Converts a date with time to the number of the minute of the hour (0-59) as `UInt8` value.
**Syntax**
@ -675,7 +677,7 @@ Alias: `MINUTE`
**Arguments**
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
@ -699,7 +701,7 @@ Result:
## toSecond
Returns the second component (0-59) of a date with time. Leap seconds are not considered.
Converts a date with time to the second in the minute (0-59) as `UInt8` value. Leap seconds are not considered.
**Syntax**
@ -711,7 +713,7 @@ Alias: `SECOND`
**Arguments**
- `value` - a [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
- `value` - a [Date](../data-types/date.md), [Date32](../data-types/date32.md), [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
**Returned value**
@ -733,40 +735,6 @@ Result:
└─────────────────────────────────────────────┘
```
## toMillisecond
Returns the millisecond component (0-999) of a date with time.
**Syntax**
```sql
toMillisecond(value)
```
*Arguments**
- `value` - [DateTime](../data-types/datetime.md) or [DateTime64](../data-types/datetime64.md)
Alias: `MILLISECOND`
```sql
SELECT toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3))
```
Result:
```response
┌──toMillisecond(toDateTime64('2023-04-21 10:20:30.456', 3))─┐
│ 456 │
└────────────────────────────────────────────────────────────┘
```
**Returned value**
- The millisecond in the minute (0 - 59) of the given date/time
Type: `UInt16`
## toUnixTimestamp
Converts a string, a date or a date with time to the [Unix Timestamp](https://en.wikipedia.org/wiki/Unix_time) in `UInt32` representation.
@ -2319,10 +2287,43 @@ Result:
## today {#today}
Accepts zero arguments and returns the current date at one of the moments of query analysis.
The same as toDate(now()).
Returns the current date at moment of query analysis. It is the same as toDate(now()) and has aliases: `curdate`, `current_date`.
Aliases: `curdate`, `current_date`.
**Syntax**
```sql
today()
```
**Arguments**
- None
**Returned value**
- Current date
Type: [DateTime](../../sql-reference/data-types/datetime.md).
**Example**
Query:
```sql
SELECT today() AS today, curdate() AS curdate, current_date() AS current_date FORMAT Pretty
```
**Result**:
Running the query above on the 3rd of March 2024 would have returned the following response:
```response
┏━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━┓
┃ today ┃ curdate ┃ current_date ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━┩
│ 2024-03-03 │ 2024-03-03 │ 2024-03-03 │
└────────────┴────────────┴──────────────┘
```
## yesterday {#yesterday}
@ -3066,6 +3067,40 @@ Result:
│ 2023-03-16 18:00:00.000 │
└─────────────────────────────────────────────────────────────────────────┘
```
## timeDiff
Returns the difference between two dates or dates with time values. The difference is calculated in units of seconds. It is same as `dateDiff` and was added only for MySQL support. `dateDiff` is preferred.
**Syntax**
```sql
timeDiff(first_datetime, second_datetime)
```
*Arguments**
- `first_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
- `second_datetime` — A DateTime/DateTime64 type const value or an expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md)
**Returned value**
The difference between two dates or dates with time values in seconds.
**Example**
Query:
```sql
timeDiff(toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02'));
```
**Result**:
```response
┌─timeDiff(toDateTime64('1927-01-01 00:00:00', 3), toDate32('1927-01-02'))─┐
│ 86400 │
└──────────────────────────────────────────────────────────────────────────┘
```
## Related content

View File

@ -22,8 +22,8 @@ seriesOutliersDetectTukey(series, min_percentile, max_percentile, K);
**Arguments**
- `series` - An array of numeric values.
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [2,98]. The default is 25.
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [2,98]. The default is 75.
- `min_percentile` - The minimum percentile to be used to calculate inter-quantile range [(IQR)](https://en.wikipedia.org/wiki/Interquartile_range). The value must be in range [0.02,0.98]. The default is 0.25.
- `max_percentile` - The maximum percentile to be used to calculate inter-quantile range (IQR). The value must be in range [0.02,0.98]. The default is 0.75.
- `K` - Non-negative constant value to detect mild or stronger outliers. The default value is 1.5.
At least four data points are required in `series` to detect outliers.
@ -53,7 +53,7 @@ Result:
Query:
``` sql
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 20, 80, 1.5) AS print_0;
SELECT seriesOutliersDetectTukey([-3, 2, 15, 3, 5, 6, 4.50, 5, 12, 45, 12, 3.40, 3, 4, 5, 6], 0.2, 0.8, 1.5) AS print_0;
```
Result:

View File

@ -83,6 +83,3 @@ Queries that are parts of `UNION/UNION ALL/UNION DISTINCT` can be run simultaneo
- [insert_null_as_default](../../../operations/settings/settings.md#insert_null_as_default) setting.
- [union_default_mode](../../../operations/settings/settings.md#union-default-mode) setting.
[Original article](https://clickhouse.com/docs/en/sql-reference/statements/select/union/) <!-- hide -->

View File

@ -1,4 +1,4 @@
---
---
slug: /zh/getting-started/example-datasets/opensky
sidebar_label: 空中交通数据
description: 该数据集中的数据是从完整的 OpenSky 数据集中衍生而来的,对其中的数据进行了必要的清理,用以展示在 COVID-19 期间空中交通的发展。
@ -53,12 +53,12 @@ CREATE TABLE opensky
ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"'
```
- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处理。 `xargs -P100` 指定最多使用 100 个并行工作程序,但由于我们只有 30 个文件,工作程序的数量将只有 30 个。
- 对于每个文件,`xargs` 将通过 `bash -c` 为每个文件运行一个脚本文件。该脚本通过使用 `{}` 表示文件名占位符,然后 `xargs` 由命令进行填充(使用 `-I{}`)。
- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` 参数),并将输出重定向到 `clickhouse-client`。
- 我们还要求使用扩展解析器解析 [DateTime](../../sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](../../operations/settings/ settings.md#settings-date_time_input_format)) 以识别具有时区偏移的 ISO-8601 格式。
- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处理。 `xargs -P100` 指定最多使用 100 个并行工作程序,但由于我们只有 30 个文件,工作程序的数量将只有 30 个。
- 对于每个文件,`xargs` 将通过 `bash -c` 为每个文件运行一个脚本文件。该脚本通过使用 `{}` 表示文件名占位符,然后 `xargs` 由命令进行填充(使用 `-I{}`)。
- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` 参数),并将输出重定向到 `clickhouse-client`。
- 我们还要求使用扩展解析器解析 [DateTime](/docs/zh/sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](/docs/zh/operations/settings/settings.md#settings-date_time_input_format)) 以识别具有时区偏移的 ISO-8601 格式。
最后,`clickhouse-client` 会以 [CSVWithNames](../../interfaces/formats.md#csvwithnames) 格式读取输入数据然后执行插入。
最后,`clickhouse-client` 会以 [CSVWithNames](/docs/zh/interfaces/formats.md#csvwithnames) 格式读取输入数据然后执行插入。
并行导入需要 24 秒。

View File

@ -567,10 +567,6 @@ public:
}
#ifndef __clang__
#pragma GCC optimize("-fno-var-tracking-assignments")
#endif
int mainEntryClickHouseBenchmark(int argc, char ** argv)
{
using namespace DB;

View File

@ -51,10 +51,6 @@
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Formats/registerFormats.h>
#ifndef __clang__
#pragma GCC optimize("-fno-var-tracking-assignments")
#endif
namespace fs = std::filesystem;
using namespace std::literals;

View File

@ -4,6 +4,7 @@
#include <Coordination/CoordinationSettings.h>
#include <Coordination/KeeperSnapshotManager.h>
#include <Coordination/ZooKeeperDataReader.h>
#include <Coordination/KeeperContext.h>
#include <Common/TerminalSize.h>
#include <Poco/ConsoleChannel.h>
#include <Poco/AutoPtr.h>

View File

@ -1228,6 +1228,13 @@ try
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
size_t page_cache_size = server_settings.page_cache_size;
if (page_cache_size != 0)
global_context->setPageCache(
server_settings.page_cache_chunk_size, server_settings.page_cache_mmap_size,
page_cache_size, server_settings.page_cache_use_madv_free,
server_settings.page_cache_use_transparent_huge_pages);
String index_uncompressed_cache_policy = server_settings.index_uncompressed_cache_policy;
size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
double index_uncompressed_cache_size_ratio = server_settings.index_uncompressed_cache_size_ratio;
@ -1874,7 +1881,6 @@ try
{
total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size);
}
}
#endif
@ -1889,10 +1895,6 @@ try
" when two different stack unwinding methods will interfere with each other.");
#endif
#if !defined(__x86_64__)
LOG_INFO(log, "Query Profiler and TraceCollector is only tested on x86_64. It also known to not work under qemu-user.");
#endif
if (!hasPHDRCache())
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created"
" (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe).");

View File

@ -0,0 +1 @@
../../../tests/config/config.d/handlers.yaml

View File

@ -752,7 +752,7 @@
<!-- Comma-separated list of prefixes for user-defined settings.
The server will allow to set these settings, and retrieve them with the getSetting function.
They are also logged in the query_log, similarly to other settings, but have no special effect.
The "SQL_" prefix is introduced for compatibility with MySQL - these settings are being set be Tableau.
The "SQL_" prefix is introduced for compatibility with MySQL - these settings are being set by Tableau.
-->
<custom_settings_prefixes>SQL_</custom_settings_prefixes>
@ -1477,17 +1477,23 @@
</query_masking_rules> -->
<!-- Uncomment to use custom http handlers.
rules are checked from top to bottom, first match runs the handler
url - to match request URL, you can use 'regex:' prefix to use regex match(optional)
empty_query_string - check that there is no query string in the URL
methods - to match request method, you can use commas to separate multiple method matches(optional)
headers - to match request headers, match each child element(child element name is header name), you can use 'regex:' prefix to use regex match(optional)
handler is request handler
type - supported types: static, dynamic_query_handler, predefined_query_handler
type - supported types: static, dynamic_query_handler, predefined_query_handler, redirect
query - use with predefined_query_handler type, executes query when the handler is called
query_param_name - use with dynamic_query_handler type, extracts and executes the value corresponding to the <query_param_name> value in HTTP request params
status - use with static type, response status code
content_type - use with static type, response content-type
response_content - use with static type, Response content sent to client, when using the prefix 'file://' or 'config://', find the content from the file or configuration send to client.
url - a location for redirect
Along with a list of rules, you can specify <defaults/> which means - enable all the default handlers.
<http_handlers>
<rule>

View File

@ -9,7 +9,8 @@
<style>
:root {
--color: black;
--background: linear-gradient(to bottom, #00CCFF, #00D0D0);
--background-color-1: #00CCFF;
--background: linear-gradient(to bottom, var(--background-color-1), #00D0D0);
--chart-background: white;
--shadow-color: rgba(0, 0, 0, 0.25);
--moving-shadow-color: rgba(0, 0, 0, 0.5);
@ -33,7 +34,8 @@
[data-theme="dark"] {
--color: white;
--background: #151C2C;
--background-color-1: #151C2C;
--background: var(--background-color-1);
--chart-background: #1b2834;
--shadow-color: rgba(0, 0, 0, 0);
--moving-shadow-color: rgba(255, 255, 255, 0.25);
@ -119,6 +121,18 @@
display: flex;
flex-flow: column nowrap;
justify-content: center;
position: sticky;
top: -1rem;
margin-top: -1rem;
margin-left: -1rem;
margin-right: -1rem;
border-top: 1rem solid var(--background-color-1);
border-left: 1rem solid var(--background-color-1);
border-right: 1rem solid var(--background-color-1);
box-sizing: content-box;
z-index: 1000;
background: var(--background-color-1);
}
.inputs.unconnected {
@ -523,49 +537,14 @@ let default_params = {
let params = default_params;
/// Palette generation for charts
function generatePalette(baseColor, numColors) {
const baseHSL = hexToHsl(baseColor);
const hueStep = 360 / numColors;
const palette = [];
function generatePalette(numColors) {
palette = [];
for (let i = 0; i < numColors; i++) {
const hue = Math.round((baseHSL.h + i * hueStep) % 360);
const color = `hsl(${hue}, ${baseHSL.s}%, ${baseHSL.l}%)`;
palette.push(color);
palette.push(`oklch(${theme != 'dark' ? 0.75 : 0.5}, 0.15, ${360 * i / numColors})`);
}
return palette;
}
/// Helper function to convert hex color to HSL
function hexToHsl(hex) {
hex = hex.replace(/^#/, '');
const bigint = parseInt(hex, 16);
const r = (bigint >> 16) & 255;
const g = (bigint >> 8) & 255;
const b = bigint & 255;
const r_norm = r / 255;
const g_norm = g / 255;
const b_norm = b / 255;
const max = Math.max(r_norm, g_norm, b_norm);
const min = Math.min(r_norm, g_norm, b_norm);
const l = (max + min) / 2;
let s = 0;
if (max !== min) {
s = l > 0.5 ? (max - min) / (2 - max - min) : (max - min) / (max + min);
}
let h = 0;
if (max !== min) {
if (max === r_norm) {
h = (g_norm - b_norm) / (max - min) + (g_norm < b_norm ? 6 : 0);
} else if (max === g_norm) {
h = (b_norm - r_norm) / (max - min) + 2;
} else {
h = (r_norm - g_norm) / (max - min) + 4;
}
}
h = Math.round(h * 60);
return { h, s: Math.round(s * 100), l: Math.round(l * 100) };
}
let theme = 'light';
function setTheme(new_theme) {
@ -1207,7 +1186,7 @@ async function draw(idx, chart, url_params, query) {
// Treat every column as series
const series_count = reply.meta.length;
const fill = series_count == 2 ? fill_color : undefined;
const palette = generatePalette(line_color, series_count);
const palette = series_count == 2 ? [line_color] : generatePalette(series_count);
let max_value = Number.NEGATIVE_INFINITY;
for (let i = 1; i < series_count; i++) {
let label = reply.meta[i].name;

230
rust/Cargo.lock generated
View File

@ -6,7 +6,7 @@ version = 3
name = "_ch_rust_prql"
version = "0.1.0"
dependencies = [
"prql-compiler",
"prqlc",
"serde_json",
]
@ -79,16 +79,15 @@ dependencies = [
[[package]]
name = "anstream"
version = "0.3.2"
version = "0.6.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163"
checksum = "d96bd03f33fe50a863e394ee9718a706f988b9079b20c3784fb726e7678b62fb"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is-terminal",
"utf8parse",
]
@ -113,33 +112,33 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys 0.52.0",
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "1.0.2"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c"
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
dependencies = [
"anstyle",
"windows-sys 0.48.0",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.75"
version = "1.0.80"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
dependencies = [
"backtrace",
]
[[package]]
name = "ariadne"
version = "0.3.0"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72fe02fc62033df9ba41cba57ee19acf5e742511a140c7dbc3a873e19a19a1bd"
checksum = "dd002a6223f12c7a95cdd4b1cb3a0149d22d37f7a9ecdb2cb691a071fe236c29"
dependencies = [
"unicode-width",
"yansi",
@ -213,16 +212,16 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.31"
version = "0.4.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38"
checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b"
dependencies = [
"android-tzdata",
"iana-time-zone",
"js-sys",
"num-traits",
"wasm-bindgen",
"windows-targets 0.48.5",
"windows-targets 0.52.0",
]
[[package]]
@ -368,7 +367,7 @@ dependencies = [
"proc-macro2",
"quote",
"scratch",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
@ -385,7 +384,7 @@ checksum = "5c6888cd161769d65134846d4d4981d5a6654307cc46ec83fb917e530aea5f84"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
@ -509,18 +508,14 @@ dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
name = "errno"
version = "0.3.8"
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
dependencies = [
"libc",
"windows-sys 0.52.0",
]
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "fnv"
@ -570,12 +565,6 @@ version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "hermit-abi"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7"
[[package]]
name = "iana-time-zone"
version = "0.1.58"
@ -606,23 +595,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "is-terminal"
version = "0.4.9"
name = "indexmap"
version = "2.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4"
dependencies = [
"hermit-abi",
"rustix",
"windows-sys 0.48.0",
]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
"equivalent",
"hashbrown",
]
[[package]]
@ -681,12 +660,6 @@ dependencies = [
"cc",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456"
[[package]]
name = "log"
version = "0.4.20"
@ -805,45 +778,35 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "proc-macro2"
version = "1.0.70"
version = "1.0.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae"
dependencies = [
"unicode-ident",
]
[[package]]
name = "prql-ast"
version = "0.9.5"
name = "prqlc"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9d91522f9f16d055409b9ffec55693a96e3424fe5d8e7c8331adcf6d7ee363a"
dependencies = [
"enum-as-inner",
"semver",
"serde",
"strum",
]
[[package]]
name = "prql-compiler"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4d56865532fcf1abaa31fbb6da6fd9e90edc441c5c78bfe2870ee75187c7a3c"
checksum = "4beb05b6b71ce096fa56d73006ab1c42a8d11bf190d193fa511a134f7730ec43"
dependencies = [
"anstream",
"anyhow",
"ariadne",
"chrono",
"csv",
"enum-as-inner",
"itertools 0.11.0",
"itertools",
"log",
"once_cell",
"prql-ast",
"prql-parser",
"prqlc-ast",
"prqlc-parser",
"regex",
"semver",
"serde",
"serde_json",
"serde_yaml",
"sqlformat",
"sqlparser",
"strum",
@ -851,15 +814,29 @@ dependencies = [
]
[[package]]
name = "prql-parser"
version = "0.9.5"
name = "prqlc-ast"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9360352e413390cfd26345f49279622b87581a3b748340d3f42d4d616c2a1ec1"
checksum = "c98923b046bc48046e3846b14a5fde5a059f681c7c367bd0ab96ebd3ecc33a71"
dependencies = [
"anyhow",
"enum-as-inner",
"semver",
"serde",
"strum",
]
[[package]]
name = "prqlc-parser"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "855ad9aba599ef608efc88a30ebd731155997d9bbe780639eb175de060b6cddc"
dependencies = [
"chumsky",
"itertools 0.11.0",
"prql-ast",
"itertools",
"prqlc-ast",
"semver",
"stacker",
]
[[package]]
@ -873,9 +850,9 @@ dependencies = [
[[package]]
name = "quote"
version = "1.0.33"
version = "1.0.35"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
dependencies = [
"proc-macro2",
]
@ -922,9 +899,9 @@ dependencies = [
[[package]]
name = "regex"
version = "1.10.2"
version = "1.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
dependencies = [
"aho-corasick",
"memchr",
@ -934,9 +911,9 @@ dependencies = [
[[package]]
name = "regex-automata"
version = "0.4.3"
version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd"
dependencies = [
"aho-corasick",
"memchr",
@ -955,19 +932,6 @@ version = "0.1.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76"
[[package]]
name = "rustix"
version = "0.38.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316"
dependencies = [
"bitflags 2.4.1",
"errno",
"libc",
"linux-raw-sys",
"windows-sys 0.52.0",
]
[[package]]
name = "rustversion"
version = "1.0.14"
@ -988,44 +952,57 @@ checksum = "a3cf7c11c38cb994f3d40e8a8cde3bbd1f72a435e4c49e85d6553d8312306152"
[[package]]
name = "semver"
version = "1.0.20"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "836fa6a3e1e547f9a2c4040802ec865b5d85f4014efe00555d7090a3dcaa1090"
checksum = "92d43fe69e652f3df9bdc2b85b2854a0825b86e4fb76bc44d945137d053639ca"
dependencies = [
"serde",
]
[[package]]
name = "serde"
version = "1.0.193"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.193"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
name = "serde_json"
version = "1.0.108"
version = "1.0.114"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d1c7e3eac408d115102c4c24ad393e0821bb3a5df4d506a80f85f7a742a526b"
checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "serde_yaml"
version = "0.9.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd075d994154d4a774f95b51fb96bdc2832b0ea48425c92546073816cda1f2f"
dependencies = [
"indexmap",
"itoa",
"ryu",
"serde",
"unsafe-libyaml",
]
[[package]]
name = "skim"
version = "0.10.4"
@ -1057,16 +1034,16 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c"
dependencies = [
"itertools 0.12.0",
"itertools",
"nom",
"unicode_categories",
]
[[package]]
name = "sqlparser"
version = "0.37.0"
version = "0.43.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37ae05a8250b968a3f7db93155a84d68b2e6cea1583949af5ca5b5170c76c075"
checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4"
dependencies = [
"log",
"serde",
@ -1093,24 +1070,24 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strum"
version = "0.25.0"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125"
checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f"
dependencies = [
"strum_macros",
]
[[package]]
name = "strum_macros"
version = "0.25.3"
version = "0.26.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0"
checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
@ -1126,9 +1103,9 @@ dependencies = [
[[package]]
name = "syn"
version = "2.0.41"
version = "2.0.52"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
dependencies = [
"proc-macro2",
"quote",
@ -1172,7 +1149,7 @@ checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]
[[package]]
@ -1244,6 +1221,12 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e"
[[package]]
name = "unsafe-libyaml"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab4c90930b95a82d00dc9e9ac071b4991924390d46cbd0dfe566148667605e4b"
[[package]]
name = "utf8parse"
version = "0.2.1"
@ -1304,7 +1287,7 @@ dependencies = [
"once_cell",
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
"wasm-bindgen-shared",
]
@ -1326,7 +1309,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
@ -1377,15 +1360,6 @@ dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
@ -1532,5 +1506,5 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.41",
"syn 2.0.52",
]

View File

@ -3,10 +3,8 @@ edition = "2021"
name = "_ch_rust_prql"
version = "0.1.0"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
prql-compiler = "0.9.3"
prqlc = {version = "0.11.3", default-features = false}
serde_json = "1.0"
[lib]

View File

@ -1,8 +1,8 @@
use prql_compiler::sql::Dialect;
use prql_compiler::{Options, Target};
use prqlc::sql::Dialect;
use prqlc::{Options, Target};
use std::ffi::{c_char, CString};
use std::slice;
use std::panic;
use std::slice;
fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
assert!(!out_size.is_null());
@ -14,39 +14,36 @@ fn set_output(result: String, out: *mut *mut u8, out_size: *mut u64) {
*out_ptr = CString::new(result).unwrap().into_raw() as *mut u8;
}
/// Converts a PRQL query from a raw C string to SQL, returning an error code if the conversion fails.
pub unsafe extern "C" fn prql_to_sql_impl(
query: *const u8,
size: u64,
out: *mut *mut u8,
out_size: *mut u64,
) -> i64 {
let query_vec = unsafe { slice::from_raw_parts(query, size.try_into().unwrap()) }.to_vec();
let maybe_prql_query = String::from_utf8(query_vec);
if maybe_prql_query.is_err() {
let query_vec = slice::from_raw_parts(query, size.try_into().unwrap()).to_vec();
let Ok(query_str) = String::from_utf8(query_vec) else {
set_output(
String::from("The PRQL query must be UTF-8 encoded!"),
"The PRQL query must be UTF-8 encoded!".to_string(),
out,
out_size,
);
return 1;
}
let prql_query = maybe_prql_query.unwrap();
let opts = &Options {
};
let opts = Options {
format: true,
target: Target::Sql(Some(Dialect::ClickHouse)),
signature_comment: false,
color: false,
};
let (is_err, res) = match prql_compiler::compile(&prql_query, &opts) {
Ok(sql_str) => (false, sql_str),
Err(err) => (true, err.to_string()),
};
set_output(res, out, out_size);
match is_err {
true => 1,
false => 0,
if let Ok(sql_str) = prqlc::compile(&query_str, &opts) {
set_output(sql_str, out, out_size);
0
} else {
set_output("PRQL compilation failed!".to_string(), out, out_size);
1
}
}

View File

@ -1,7 +1,7 @@
use skim::prelude::*;
use term::terminfo::TermInfo;
use cxx::{CxxString, CxxVector};
use skim::prelude::*;
use std::panic;
use term::terminfo::TermInfo;
#[cxx::bridge]
mod ffi {
@ -16,7 +16,7 @@ struct Item {
}
impl Item {
fn new(text: String) -> Self {
return Self{
Self {
// Text that will be printed by skim, and will be used for matching.
//
// Text that will be shown should not contains new lines since in this case skim may
@ -24,16 +24,16 @@ impl Item {
text_no_newlines: text.replace("\n", " "),
// This will be used when the match had been selected.
orig_text: text,
};
}
}
}
impl SkimItem for Item {
fn text(&self) -> Cow<str> {
return Cow::Borrowed(&self.text_no_newlines);
Cow::Borrowed(&self.text_no_newlines)
}
fn output(&self) -> Cow<str> {
return Cow::Borrowed(&self.orig_text);
Cow::Borrowed(&self.orig_text)
}
}
@ -88,14 +88,11 @@ fn skim_impl(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String,
if output.selected_items.is_empty() {
return Err("No items had been selected".to_string());
}
return Ok(output.selected_items[0].output().to_string());
Ok(output.selected_items[0].output().to_string())
}
fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, String> {
let ret = panic::catch_unwind(|| {
return skim_impl(prefix, words);
});
return match ret {
match panic::catch_unwind(|| skim_impl(prefix, words)) {
Err(err) => {
let e = if let Some(s) = err.downcast_ref::<String>() {
format!("{}", s)
@ -105,7 +102,7 @@ fn skim(prefix: &CxxString, words: &CxxVector<CxxString>) -> Result<String, Stri
format!("Unknown panic type: {:?}", err.type_id())
};
Err(format!("Rust panic: {:?}", e))
},
}
Ok(res) => res,
}
}

View File

@ -161,7 +161,9 @@ enum class AccessType
M(SYSTEM_DROP_QUERY_CACHE, "SYSTEM DROP QUERY, DROP QUERY CACHE, DROP QUERY", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_COMPILED_EXPRESSION_CACHE, "SYSTEM DROP COMPILED EXPRESSION, DROP COMPILED EXPRESSION CACHE, DROP COMPILED EXPRESSIONS", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_FILESYSTEM_CACHE, "SYSTEM DROP FILESYSTEM CACHE, DROP FILESYSTEM CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_DISTRIBUTED_CACHE, "SYSTEM DROP DISTRIBUTED CACHE, DROP DISTRIBUTED CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_SYNC_FILESYSTEM_CACHE, "SYSTEM REPAIR FILESYSTEM CACHE, REPAIR FILESYSTEM CACHE, SYNC FILESYSTEM CACHE", GLOBAL, SYSTEM) \
M(SYSTEM_DROP_PAGE_CACHE, "SYSTEM DROP PAGE CACHE, DROP PAGE CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_SCHEMA_CACHE, "SYSTEM DROP SCHEMA CACHE, DROP SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_FORMAT_SCHEMA_CACHE, "SYSTEM DROP FORMAT SCHEMA CACHE, DROP FORMAT SCHEMA CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
M(SYSTEM_DROP_S3_CLIENT_CACHE, "SYSTEM DROP S3 CLIENT, DROP S3 CLIENT CACHE", GLOBAL, SYSTEM_DROP_CACHE) \
@ -186,6 +188,7 @@ enum class AccessType
M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP REPLICATED SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \
M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \
M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP REPLICATION QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \
M(SYSTEM_VIRTUAL_PARTS_UPDATE, "SYSTEM STOP VIRTUAL PARTS UPDATE, SYSTEM START VIRTUAL PARTS UPDATE, STOP VIRTUAL PARTS UPDATE, START VIRTUAL PARTS UPDATE", TABLE, SYSTEM) \
M(SYSTEM_DROP_REPLICA, "DROP REPLICA", TABLE, SYSTEM) \
M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \
M(SYSTEM_REPLICA_READINESS, "SYSTEM REPLICA READY, SYSTEM REPLICA UNREADY", GLOBAL, SYSTEM) \

View File

@ -51,7 +51,7 @@ TEST(AccessRights, Union)
"CREATE DICTIONARY, DROP DATABASE, DROP TABLE, DROP VIEW, DROP DICTIONARY, UNDROP TABLE, "
"TRUNCATE, OPTIMIZE, BACKUP, CREATE ROW POLICY, ALTER ROW POLICY, DROP ROW POLICY, "
"SHOW ROW POLICIES, SYSTEM MERGES, SYSTEM TTL MERGES, SYSTEM FETCHES, "
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, "
"SYSTEM MOVES, SYSTEM PULLING REPLICATION LOG, SYSTEM CLEANUP, SYSTEM VIEWS, SYSTEM SENDS, SYSTEM REPLICATION QUEUES, SYSTEM VIRTUAL PARTS UPDATE, "
"SYSTEM DROP REPLICA, SYSTEM SYNC REPLICA, SYSTEM RESTART REPLICA, "
"SYSTEM RESTORE REPLICA, SYSTEM WAIT LOADING PARTS, SYSTEM SYNC DATABASE REPLICA, SYSTEM FLUSH DISTRIBUTED, dictGet ON db1.*, "
"GRANT SET DEFINER ON db1, GRANT NAMED COLLECTION ADMIN ON db1");

View File

@ -5,12 +5,14 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeIPv4andIPv6.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h>
#include <IO/ReadHelpersArena.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnArray.h>
@ -31,6 +33,7 @@ namespace ErrorCodes
{
extern const int ARGUMENT_OUT_OF_BOUND;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
@ -58,23 +61,53 @@ protected:
using State = AggregateFunctionTopKData<T>;
UInt64 threshold;
UInt64 reserved;
bool include_counts;
bool is_approx_top_k;
public:
AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, createResultType(argument_types_))
, threshold(threshold_), reserved(load_factor * threshold)
AggregateFunctionTopK(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, createResultType(argument_types_, include_counts_))
, threshold(threshold_), reserved(reserved_), include_counts(include_counts_), is_approx_top_k(is_approx_top_k_)
{}
AggregateFunctionTopK(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params, const DataTypePtr & result_type_)
AggregateFunctionTopK(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params, const DataTypePtr & result_type_)
: IAggregateFunctionDataHelper<AggregateFunctionTopKData<T>, AggregateFunctionTopK<T, is_weighted>>(argument_types_, params, result_type_)
, threshold(threshold_), reserved(load_factor * threshold)
, threshold(threshold_), reserved(reserved_), include_counts(include_counts_), is_approx_top_k(is_approx_top_k_)
{}
String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
static DataTypePtr createResultType(const DataTypes & argument_types_)
String getName() const override
{
return std::make_shared<DataTypeArray>(argument_types_[0]);
if (is_approx_top_k)
return is_weighted ? "approx_top_sum" : "approx_top_k";
else
return is_weighted ? "topKWeighted" : "topK";
}
static DataTypePtr createResultType(const DataTypes & argument_types_, bool include_counts_)
{
if (include_counts_)
{
DataTypes types
{
argument_types_[0],
std::make_shared<DataTypeUInt64>(),
std::make_shared<DataTypeUInt64>(),
};
Strings names
{
"item",
"count",
"error",
};
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
));
}
else
return std::make_shared<DataTypeArray>(argument_types_[0]);
}
bool allocatesMemoryInArena() const override { return false; }
@ -122,13 +155,40 @@ public:
offsets_to.push_back(offsets_to.back() + size);
typename ColumnVector<T>::Container & data_to = assert_cast<ColumnVector<T> &>(arr_to.getData()).getData();
size_t old_size = data_to.size();
data_to.resize(old_size + size);
IColumn & data_to = arr_to.getData();
size_t i = 0;
for (auto it = result_vec.begin(); it != result_vec.end(); ++it, ++i)
data_to[old_size + i] = it->key;
if (include_counts)
{
auto & column_tuple = assert_cast<ColumnTuple &>(data_to);
auto & column_key = assert_cast<ColumnVector<T> &>(column_tuple.getColumn(0)).getData();
auto & column_count = assert_cast<ColumnVector<UInt64> &>(column_tuple.getColumn(1)).getData();
auto & column_error = assert_cast<ColumnVector<UInt64> &>(column_tuple.getColumn(2)).getData();
size_t old_size = column_key.size();
column_key.resize(old_size + size);
column_count.resize(old_size + size);
column_error.resize(old_size + size);
size_t i = 0;
for (auto it = result_vec.begin(); it != result_vec.end(); ++it, ++i)
{
column_key[old_size + i] = it->key;
column_count[old_size + i] = it->count;
column_error[old_size + i] = it->error;
}
} else
{
auto & column_key = assert_cast<ColumnVector<T> &>(data_to).getData();
size_t old_size = column_key.size();
column_key.resize(old_size + size);
size_t i = 0;
for (auto it = result_vec.begin(); it != result_vec.end(); ++it, ++i)
{
column_key[old_size + i] = it->key;
}
}
}
};
@ -153,18 +213,50 @@ private:
UInt64 threshold;
UInt64 reserved;
bool include_counts;
bool is_approx_top_k;
public:
AggregateFunctionTopKGeneric(
UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>(argument_types_, params, createResultType(argument_types_))
, threshold(threshold_), reserved(load_factor * threshold) {}
UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column, is_weighted>>(argument_types_, params, createResultType(argument_types_, include_counts_))
, threshold(threshold_), reserved(reserved_), include_counts(include_counts_), is_approx_top_k(is_approx_top_k_) {}
String getName() const override { return is_weighted ? "topKWeighted" : "topK"; }
static DataTypePtr createResultType(const DataTypes & argument_types_)
String getName() const override
{
return std::make_shared<DataTypeArray>(argument_types_[0]);
if (is_approx_top_k)
return is_weighted ? "approx_top_sum" : "approx_top_k";
else
return is_weighted ? "topKWeighted" : "topK";
}
static DataTypePtr createResultType(const DataTypes & argument_types_, bool include_counts_)
{
if (include_counts_)
{
DataTypes types
{
argument_types_[0],
std::make_shared<DataTypeUInt64>(),
std::make_shared<DataTypeUInt64>(),
};
Strings names
{
"item",
"count",
"error",
};
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
));
} else
{
return std::make_shared<DataTypeArray>(argument_types_[0]);
}
}
bool allocatesMemoryInArena() const override
@ -247,13 +339,33 @@ public:
{
ColumnArray & arr_to = assert_cast<ColumnArray &>(to);
ColumnArray::Offsets & offsets_to = arr_to.getOffsets();
const typename State::Set & set = this->data(place).value;
auto result_vec = set.topK(threshold);
size_t size = result_vec.size();
offsets_to.push_back(offsets_to.back() + size);
IColumn & data_to = arr_to.getData();
auto result_vec = this->data(place).value.topK(threshold);
offsets_to.push_back(offsets_to.back() + result_vec.size());
for (auto & elem : result_vec)
deserializeAndInsert<is_plain_column>(elem.key, data_to);
if (include_counts)
{
auto & column_tuple = assert_cast<ColumnTuple &>(data_to);
IColumn & column_key = column_tuple.getColumn(0);
IColumn & column_count = column_tuple.getColumn(1);
IColumn & column_error = column_tuple.getColumn(2);
for (auto &elem : result_vec)
{
column_count.insert(elem.count);
column_error.insert(elem.error);
deserializeAndInsert<is_plain_column>(elem.key, column_key);
}
} else
{
for (auto & elem : result_vec)
{
deserializeAndInsert<is_plain_column>(elem.key, data_to);
}
}
}
};
@ -265,13 +377,14 @@ class AggregateFunctionTopKDate : public AggregateFunctionTopK<DataTypeDate::Fie
public:
using AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>::AggregateFunctionTopK;
AggregateFunctionTopKDate(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
AggregateFunctionTopKDate(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: AggregateFunctionTopK<DataTypeDate::FieldType, is_weighted>(
threshold_,
load_factor,
reserved_,
include_counts_,
is_approx_top_k_,
argument_types_,
params,
std::make_shared<DataTypeArray>(std::make_shared<DataTypeDate>()))
params)
{}
};
@ -281,13 +394,14 @@ class AggregateFunctionTopKDateTime : public AggregateFunctionTopK<DataTypeDateT
public:
using AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>::AggregateFunctionTopK;
AggregateFunctionTopKDateTime(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
AggregateFunctionTopKDateTime(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: AggregateFunctionTopK<DataTypeDateTime::FieldType, is_weighted>(
threshold_,
load_factor,
reserved_,
include_counts_,
is_approx_top_k_,
argument_types_,
params,
std::make_shared<DataTypeArray>(std::make_shared<DataTypeDateTime>()))
params)
{}
};
@ -297,40 +411,41 @@ class AggregateFunctionTopKIPv4 : public AggregateFunctionTopK<DataTypeIPv4::Fie
public:
using AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>::AggregateFunctionTopK;
AggregateFunctionTopKIPv4(UInt64 threshold_, UInt64 load_factor, const DataTypes & argument_types_, const Array & params)
AggregateFunctionTopKIPv4(UInt64 threshold_, UInt64 reserved_, bool include_counts_, bool is_approx_top_k_, const DataTypes & argument_types_, const Array & params)
: AggregateFunctionTopK<DataTypeIPv4::FieldType, is_weighted>(
threshold_,
load_factor,
reserved_,
include_counts_,
is_approx_top_k_,
argument_types_,
params,
std::make_shared<DataTypeArray>(std::make_shared<DataTypeIPv4>()))
params)
{}
};
template <bool is_weighted>
IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 load_factor, const Array & params)
IAggregateFunction * createWithExtraTypes(const DataTypes & argument_types, UInt64 threshold, UInt64 reserved, bool include_counts, bool is_approx_top_k, const Array & params)
{
if (argument_types.empty())
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Got empty arguments list");
WhichDataType which(argument_types[0]);
if (which.idx == TypeIndex::Date)
return new AggregateFunctionTopKDate<is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKDate<is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
if (which.idx == TypeIndex::DateTime)
return new AggregateFunctionTopKDateTime<is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKDateTime<is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
if (which.idx == TypeIndex::IPv4)
return new AggregateFunctionTopKIPv4<is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKIPv4<is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
/// Check that we can use plain version of AggregateFunctionTopKGeneric
if (argument_types[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion())
return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKGeneric<true, is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
else
return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, load_factor, argument_types, params);
return new AggregateFunctionTopKGeneric<false, is_weighted>(threshold, reserved, include_counts, is_approx_top_k, argument_types, params);
}
template <bool is_weighted>
template <bool is_weighted, bool is_approx_top_k>
AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
{
if (!is_weighted)
@ -346,40 +461,65 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
UInt64 threshold = 10; /// default values
UInt64 load_factor = 3;
bool include_counts = is_approx_top_k;
UInt64 reserved = threshold * load_factor;
if (!params.empty())
{
if (params.size() > 2)
if (params.size() > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,
"Aggregate function '{}' requires two parameters or less", name);
if (params.size() == 2)
{
load_factor = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
if (load_factor < 1)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too small parameter 'load_factor' for aggregate function '{}' (got {}, minimum is 1)", name, load_factor);
}
"Aggregate function '{}' requires three parameters or less", name);
threshold = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[0]);
if (threshold > DB::TOP_K_MAX_SIZE || load_factor > DB::TOP_K_MAX_SIZE || threshold * load_factor > DB::TOP_K_MAX_SIZE)
throw Exception(
ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too large parameter(s) for aggregate function '{}' (maximum is {})",
name,
toString(DB::TOP_K_MAX_SIZE));
if (params.size() >= 2)
{
if (is_approx_top_k)
{
reserved = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
if (threshold == 0)
if (reserved < 1)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too small parameter 'reserved' for aggregate function '{}' (got {}, minimum is 1)", name, reserved);
} else
{
load_factor = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);
if (load_factor < 1)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too small parameter 'load_factor' for aggregate function '{}' (got {}, minimum is 1)", name, load_factor);
}
}
if (params.size() == 3)
{
String option = params.at(2).safeGet<String>();
if (option == "counts")
include_counts = true;
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} doesn't support a parameter: {}", name, option);
}
if (!is_approx_top_k)
{
reserved = threshold * load_factor;
}
if (reserved > DB::TOP_K_MAX_SIZE || load_factor > DB::TOP_K_MAX_SIZE || threshold > DB::TOP_K_MAX_SIZE)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND,
"Too large parameter(s) for aggregate function '{}' (maximum is {})", name, toString(TOP_K_MAX_SIZE));
if (threshold == 0 || reserved == 0)
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Parameter 0 is illegal for aggregate function '{}'", name);
}
AggregateFunctionPtr res(createWithNumericType<AggregateFunctionTopK, is_weighted>(
*argument_types[0], threshold, load_factor, argument_types, params));
*argument_types[0], threshold, reserved, include_counts, is_approx_top_k, argument_types, params));
if (!res)
res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types, threshold, load_factor, params));
res = AggregateFunctionPtr(createWithExtraTypes<is_weighted>(argument_types, threshold, reserved, include_counts, is_approx_top_k, params));
if (!res)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
@ -393,8 +533,11 @@ void registerAggregateFunctionTopK(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("topK", { createAggregateFunctionTopK<false>, properties });
factory.registerFunction("topKWeighted", { createAggregateFunctionTopK<true>, properties });
factory.registerFunction("topK", { createAggregateFunctionTopK<false, false>, properties });
factory.registerFunction("topKWeighted", { createAggregateFunctionTopK<true, false>, properties });
factory.registerFunction("approx_top_k", { createAggregateFunctionTopK<false, true>, properties }, AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("approx_top_sum", { createAggregateFunctionTopK<true, true>, properties }, AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("approx_top_count", "approx_top_k", AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -8,10 +8,8 @@
# include <unicode/ucol.h>
# include <unicode/unistr.h>
#else
# if defined(__clang__)
# pragma clang diagnostic ignored "-Wunused-private-field"
# pragma clang diagnostic ignored "-Wmissing-noreturn"
# endif
# pragma clang diagnostic ignored "-Wunused-private-field"
# pragma clang diagnostic ignored "-Wmissing-noreturn"
#endif
#include <Common/Exception.h>

View File

@ -1,10 +1,7 @@
#include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/MaskOperations.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromArena.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <Common/AlignedBuffer.h>
#include <Common/Arena.h>
#include <Common/FieldVisitorToString.h>
@ -14,6 +11,10 @@
#include <Common/assert_cast.h>
#include <Common/iota.h>
#include <Common/typeid_cast.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromArena.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
namespace DB
@ -542,7 +543,7 @@ void ColumnAggregateFunction::insertDefault()
pushBackAndCreateState(data, arena, func.get());
}
StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin, const UInt8 *) const
StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const
{
WriteBufferFromArena out(arena, begin);
func->serialize(data[n], out, version);
@ -650,11 +651,6 @@ void ColumnAggregateFunction::getPermutation(PermutationSortDirection /*directio
void ColumnAggregateFunction::updatePermutation(PermutationSortDirection, PermutationSortStability,
size_t, int, Permutation &, EqualRanges&) const {}
void ColumnAggregateFunction::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnAggregateFunction::getExtremes(Field & min, Field & max) const
{
/// Place serialized default values into min/max.
@ -690,7 +686,7 @@ ColumnAggregateFunction::MutablePtr ColumnAggregateFunction::createView() const
}
ColumnAggregateFunction::ColumnAggregateFunction(const ColumnAggregateFunction & src_)
: COWHelper<IColumn, ColumnAggregateFunction>(src_),
: COWHelper<IColumnHelper<ColumnAggregateFunction>, ColumnAggregateFunction>(src_),
foreign_arenas(concatArenas(src_.foreign_arenas, src_.my_arena)),
func(src_.func), src(src_.getPtr()), data(src_.data.begin(), src_.data.end())
{

View File

@ -51,13 +51,13 @@ using ConstArenas = std::vector<ConstArenaPtr>;
* specifying which individual values should be destroyed and which ones should not.
* Clearly, this method would have a substantially non-zero price.
*/
class ColumnAggregateFunction final : public COWHelper<IColumn, ColumnAggregateFunction>
class ColumnAggregateFunction final : public COWHelper<IColumnHelper<ColumnAggregateFunction>, ColumnAggregateFunction>
{
public:
using Container = PaddedPODArray<AggregateDataPtr>;
private:
friend class COWHelper<IColumn, ColumnAggregateFunction>;
friend class COWHelper<IColumnHelper<ColumnAggregateFunction>, ColumnAggregateFunction>;
/// Arenas used by function states that are created elsewhere. We own these
/// arenas in the sense of extending their lifetime, but do not modify them.
@ -164,7 +164,7 @@ public:
void insertDefault() override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
const char * deserializeAndInsertFromArena(const char * src_arena) override;
@ -203,8 +203,6 @@ public:
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
int compareAt(size_t, size_t, const IColumn &, int) const override
{
return 0;

View File

@ -8,7 +8,6 @@
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <Common/Exception.h>
#include <Common/Arena.h>
#include <Common/SipHash.h>
@ -205,7 +204,7 @@ void ColumnArray::insertData(const char * pos, size_t length)
}
StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
size_t array_size = sizeAt(n);
size_t offset = offsetAt(n);
@ -226,6 +225,19 @@ StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char con
}
char * ColumnArray::serializeValueIntoMemory(size_t n, char * memory) const
{
size_t array_size = sizeAt(n);
size_t offset = offsetAt(n);
memcpy(memory, &array_size, sizeof(array_size));
memory += sizeof(array_size);
for (size_t i = 0; i < array_size; ++i)
memory = getData().serializeValueIntoMemory(offset + i, memory);
return memory;
}
const char * ColumnArray::deserializeAndInsertFromArena(const char * pos)
{
size_t array_size = unalignedLoad<size_t>(pos);
@ -390,19 +402,6 @@ int ColumnArray::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_
return compareAtImpl(n, m, rhs_, nan_direction_hint, &collator);
}
void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnArray>(assert_cast<const ColumnArray &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnArray::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnArray>();
}
struct ColumnArray::ComparatorBase
{
const ColumnArray & parent;
@ -988,22 +987,6 @@ ColumnPtr ColumnArray::compress() const
});
}
double ColumnArray::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnArray>(sample_ratio);
}
UInt64 ColumnArray::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnArray>();
}
void ColumnArray::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnArray>(indices, from, limit);
}
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
{
if (replicate_offsets.empty())
@ -1298,11 +1281,6 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
assert_cast<const ColumnArray &>(*temporary_arrays.front()).getOffsetsPtr());
}
void ColumnArray::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
size_t ColumnArray::getNumberOfDimensions() const
{
const auto * nested_array = checkAndGetColumn<ColumnArray>(*data);

View File

@ -15,10 +15,10 @@ namespace DB
* In memory, it is represented as one column of a nested type, whose size is equal to the sum of the sizes of all arrays,
* and as an array of offsets in it, which allows you to get each element.
*/
class ColumnArray final : public COWHelper<IColumn, ColumnArray>
class ColumnArray final : public COWHelper<IColumnHelper<ColumnArray>, ColumnArray>
{
private:
friend class COWHelper<IColumn, ColumnArray>;
friend class COWHelper<IColumnHelper<ColumnArray>, ColumnArray>;
/** Create an array column with specified values and offsets. */
ColumnArray(MutableColumnPtr && nested_column, MutableColumnPtr && offsets_column);
@ -48,7 +48,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnArray>;
using Base = COWHelper<IColumnHelper<ColumnArray>, ColumnArray>;
static Ptr create(const ColumnPtr & nested_column, const ColumnPtr & offsets_column)
{
@ -77,7 +77,8 @@ public:
StringRef getDataAt(size_t n) const override;
bool isDefaultAt(size_t n) const override;
void insertData(const char * pos, size_t length) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
@ -95,11 +96,7 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
bool hasEqualValues() const override;
void getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res) const override;
void updatePermutation(PermutationSortDirection direction, PermutationSortStability stability,
@ -148,13 +145,6 @@ public:
/// For example, `getDataInRange(0, size())` is the same as `getDataPtr()->clone()`.
MutableColumnPtr getDataInRange(size_t start, size_t length) const;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnArray>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void forEachSubcolumn(MutableColumnCallback callback) override
@ -178,11 +168,6 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override { data->finalize(); }
bool isFinalized() const override { return data->isFinalized(); }

View File

@ -30,7 +30,7 @@ namespace ErrorCodes
*
* Also in-memory compression allows to keep more data in RAM.
*/
class ColumnCompressed : public COWHelper<IColumn, ColumnCompressed>
class ColumnCompressed : public COWHelper<IColumnHelper<ColumnCompressed>, ColumnCompressed>
{
public:
using Lazy = std::function<ColumnPtr()>;
@ -89,7 +89,8 @@ public:
void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
void insertDefault() override { throwMustBeDecompressed(); }
void popBack(size_t) override { throwMustBeDecompressed(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeDecompressed(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
char * serializeValueIntoMemory(size_t, char *) const override { throwMustBeDecompressed(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }

View File

@ -20,10 +20,10 @@ namespace ErrorCodes
/** ColumnConst contains another column with single element,
* but looks like a column with arbitrary amount of same elements.
*/
class ColumnConst final : public COWHelper<IColumn, ColumnConst>
class ColumnConst final : public COWHelper<IColumnHelper<ColumnConst>, ColumnConst>
{
private:
friend class COWHelper<IColumn, ColumnConst>;
friend class COWHelper<IColumnHelper<ColumnConst>, ColumnConst>;
WrappedPtr data;
size_t s;
@ -160,11 +160,16 @@ public:
s -= n;
}
StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin, const UInt8 *) const override
StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin) const override
{
return data->serializeValueIntoArena(0, arena, begin);
}
char * serializeValueIntoMemory(size_t, char * memory) const override
{
return data->serializeValueIntoMemory(0, memory);
}
const char * deserializeAndInsertFromArena(const char * pos) override
{
const auto * res = data->deserializeAndInsertFromArena(pos);

View File

@ -42,46 +42,6 @@ int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) c
return decimalLess<T>(b, a, other.scale, scale) ? 1 : (decimalLess<T>(a, b, scale, other.scale) ? -1 : 0);
}
template <is_decimal T>
void ColumnDecimal<T>::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return this->template doCompareColumn<ColumnDecimal<T>>(static_cast<const Self &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
template <is_decimal T>
bool ColumnDecimal<T>::hasEqualValues() const
{
return this->template hasEqualValuesImpl<ColumnDecimal<T>>();
}
template <is_decimal T>
StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
char * pos;
if (null_bit)
{
res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T);
pos = arena.allocContinue(res.size, begin);
res.data = pos;
memcpy(pos, null_bit, null_bit_size);
if (*null_bit) return res;
pos += null_bit_size;
}
else
{
res.size = sizeof(T);
pos = arena.allocContinue(res.size, begin);
res.data = pos;
}
memcpy(pos, &data[n], sizeof(T));
return res;
}
template <is_decimal T>
const char * ColumnDecimal<T>::deserializeAndInsertFromArena(const char * pos)
{
@ -470,12 +430,6 @@ ColumnPtr ColumnDecimal<T>::replicate(const IColumn::Offsets & offsets) const
return res;
}
template <is_decimal T>
void ColumnDecimal<T>::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
template <is_decimal T>
ColumnPtr ColumnDecimal<T>::compress() const
{

View File

@ -1,14 +1,12 @@
#pragma once
#include <cmath>
#include <base/sort.h>
#include <base/TypeName.h>
#include <Core/Field.h>
#include <Core/DecimalFunctions.h>
#include <Core/TypeId.h>
#include <Common/typeid_cast.h>
#include <Columns/ColumnVectorHelper.h>
#include <Columns/ColumnFixedSizeHelper.h>
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
@ -18,11 +16,11 @@ namespace DB
/// A ColumnVector for Decimals
template <is_decimal T>
class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T>>
class ColumnDecimal final : public COWHelper<IColumnHelper<ColumnDecimal<T>, ColumnFixedSizeHelper>, ColumnDecimal<T>>
{
private:
using Self = ColumnDecimal;
friend class COWHelper<ColumnVectorHelper, Self>;
friend class COWHelper<IColumnHelper<Self, ColumnFixedSizeHelper>, Self>;
public:
using ValueType = T;
@ -82,17 +80,12 @@ public:
Float64 getFloat64(size_t n) const final { return DecimalUtils::convertTo<Float64>(data[n], scale); }
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
bool hasEqualValues() const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
@ -119,13 +112,6 @@ public:
ColumnPtr replicate(const IColumn::Offsets & offsets) const override;
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector) const override
{
return this->template scatterImpl<Self>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
bool structureEquals(const IColumn & rhs) const override
{
if (auto rhs_concrete = typeid_cast<const ColumnDecimal<T> *>(&rhs))
@ -133,21 +119,6 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return this->template getNumberOfDefaultRowsImpl<Self>();
}
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
{
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
}
ColumnPtr compress() const override;
void insertValue(const T value) { data.push_back(value); }

View File

@ -7,7 +7,7 @@
namespace DB
{
/** Allows to access internal array of ColumnVector or ColumnFixedString without cast to concrete type.
/** Allows to access internal array of fixed-size column without cast to concrete type.
* We will inherit ColumnVector and ColumnFixedString from this class instead of IColumn.
* Assumes data layout of ColumnVector, ColumnFixedString and PODArray.
*
@ -22,7 +22,7 @@ namespace DB
* To allow functional tests to work under UBSan we have to separate some base class that will present the memory layout in explicit way,
* and we will do static_cast to this class.
*/
class ColumnVectorHelper : public IColumn
class ColumnFixedSizeHelper : public IColumn
{
public:
template <size_t ELEMENT_SIZE>

View File

@ -2,7 +2,6 @@
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <IO/WriteHelpers.h>
#include <Common/Arena.h>
#include <Common/HashTable/Hash.h>
@ -97,30 +96,6 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
memset(chars.data() + old_size + length, 0, n - length);
}
StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
char * pos;
if (null_bit)
{
res.size = * null_bit ? null_bit_size : null_bit_size + n;
pos = arena.allocContinue(res.size, begin);
res.data = pos;
memcpy(pos, null_bit, null_bit_size);
if (*null_bit) return res;
pos += null_bit_size;
}
else
{
res.size = n;
pos = arena.allocContinue(res.size, begin);
res.data = pos;
}
memcpy(pos, &chars[n * index], n);
return res;
}
const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos)
{
size_t old_size = chars.size();
@ -375,11 +350,6 @@ ColumnPtr ColumnFixedString::replicate(const Offsets & offsets) const
return res;
}
void ColumnFixedString::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnFixedString::getExtremes(Field & min, Field & max) const
{
min = String();

View File

@ -6,7 +6,7 @@
#include <Common/assert_cast.h>
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ColumnVectorHelper.h>
#include <Columns/ColumnFixedSizeHelper.h>
#include <Core/Field.h>
@ -16,10 +16,10 @@ namespace DB
/** A column of values of "fixed-length string" type.
* If you insert a smaller string, it will be padded with zero bytes.
*/
class ColumnFixedString final : public COWHelper<ColumnVectorHelper, ColumnFixedString>
class ColumnFixedString final : public COWHelper<IColumnHelper<ColumnFixedString, ColumnFixedSizeHelper>, ColumnFixedString>
{
public:
friend class COWHelper<ColumnVectorHelper, ColumnFixedString>;
friend class COWHelper<IColumnHelper<ColumnFixedString, ColumnFixedSizeHelper>, ColumnFixedString>;
using Chars = PaddedPODArray<UInt8>;
@ -107,7 +107,7 @@ public:
chars.resize_fill(chars.size() + n);
}
virtual void insertManyDefaults(size_t length) override
void insertManyDefaults(size_t length) override
{
chars.resize_fill(chars.size() + n * length);
}
@ -117,8 +117,6 @@ public:
chars.resize_assume_reserved(chars.size() - n * elems);
}
StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
@ -136,24 +134,6 @@ public:
return memcmpSmallAllowOverflow15(chars.data() + p1 * n, rhs.chars.data() + p2 * n, n);
}
void compareColumn(
const IColumn & rhs_,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int direction,
int nan_direction_hint) const override
{
const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_);
chassert(this->n == rhs.n);
return doCompareColumn<ColumnFixedString>(rhs, rhs_row_num, row_indexes, compare_results, direction, nan_direction_hint);
}
bool hasEqualValues() const override
{
return hasEqualValuesImpl<ColumnFixedString>();
}
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res) const override;
@ -175,13 +155,6 @@ public:
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnFixedString>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void reserve(size_t size) override
@ -208,21 +181,6 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnFixedString>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnFixedString>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndicesOfNonDefaultRowsImpl<ColumnFixedString>(indices, from, limit);
}
bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return true; }

View File

@ -19,10 +19,10 @@ using FunctionBasePtr = std::shared_ptr<const IFunctionBase>;
/** A column containing a lambda expression.
* Contains an expression and captured columns, but not input arguments.
*/
class ColumnFunction final : public COWHelper<IColumn, ColumnFunction>
class ColumnFunction final : public COWHelper<IColumnHelper<ColumnFunction>, ColumnFunction>
{
private:
friend class COWHelper<IColumn, ColumnFunction>;
friend class COWHelper<IColumnHelper<ColumnFunction>, ColumnFunction>;
ColumnFunction(
size_t size,
@ -102,7 +102,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
}
StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot serialize from {}", getName());
}

View File

@ -2,7 +2,6 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <DataTypes/NumberTraits.h>
#include <Common/HashTable/HashMap.h>
#include <Common/WeakHash.h>
@ -137,7 +136,7 @@ ColumnLowCardinality::ColumnLowCardinality(MutableColumnPtr && column_unique_, M
void ColumnLowCardinality::insert(const Field & x)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsert(x));
idx.insertPosition(getDictionary().uniqueInsert(x));
}
bool ColumnLowCardinality::tryInsert(const Field & x)
@ -175,14 +174,14 @@ void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
{
compactIfSharedDictionary();
const auto & nested = *low_cardinality_src->getDictionary().getNestedColumn();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(nested, position));
idx.insertPosition(getDictionary().uniqueInsertFrom(nested, position));
}
}
void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertFrom(src, n));
idx.insertPosition(getDictionary().uniqueInsertFrom(src, n));
}
void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length)
@ -209,7 +208,7 @@ void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, si
auto src_nested = low_cardinality_src->getDictionary().getNestedColumn();
auto used_keys = src_nested->index(*idx_map, 0);
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size());
auto inserted_indexes = getDictionary().uniqueInsertRangeFrom(*used_keys, 0, used_keys->size());
idx.insertPositionsRange(*inserted_indexes->index(*sub_idx, 0), 0, length);
}
}
@ -217,7 +216,7 @@ void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, si
void ColumnLowCardinality::insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length)
{
compactIfSharedDictionary();
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(src, start, length);
auto inserted_indexes = getDictionary().uniqueInsertRangeFrom(src, start, length);
idx.insertPositionsRange(*inserted_indexes, 0, length);
}
@ -257,27 +256,50 @@ void ColumnLowCardinality::insertRangeFromDictionaryEncodedColumn(const IColumn
{
checkPositionsAreLimited(positions, keys.size());
compactIfSharedDictionary();
auto inserted_indexes = dictionary.getColumnUnique().uniqueInsertRangeFrom(keys, 0, keys.size());
auto inserted_indexes = getDictionary().uniqueInsertRangeFrom(keys, 0, keys.size());
idx.insertPositionsRange(*inserted_indexes->index(positions, 0), 0, positions.size());
}
void ColumnLowCardinality::insertData(const char * pos, size_t length)
{
compactIfSharedDictionary();
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length));
idx.insertPosition(getDictionary().uniqueInsertData(pos, length));
}
StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
return getDictionary().serializeValueIntoArena(getIndexes().getUInt(n), arena, begin);
}
char * ColumnLowCardinality::serializeValueIntoMemory(size_t n, char * memory) const
{
return getDictionary().serializeValueIntoMemory(getIndexes().getUInt(n), memory);
}
void ColumnLowCardinality::collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const
{
/// nullable is handled internally.
chassert(is_null == nullptr);
if (empty())
return;
size_t rows = size();
if (sizes.empty())
sizes.resize_fill(rows);
else if (sizes.size() != rows)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of sizes: {} doesn't match rows_num: {}. It is a bug", sizes.size(), rows);
PaddedPODArray<UInt64> dict_sizes;
getDictionary().collectSerializedValueSizes(dict_sizes, nullptr);
idx.collectSerializedValueSizes(sizes, dict_sizes);
}
const char * ColumnLowCardinality::deserializeAndInsertFromArena(const char * pos)
{
compactIfSharedDictionary();
const char * new_pos;
idx.insertPosition(dictionary.getColumnUnique().uniqueDeserializeAndInsertFromArena(pos, new_pos));
idx.insertPosition(getDictionary().uniqueDeserializeAndInsertFromArena(pos, new_pos));
return new_pos;
}
@ -308,11 +330,6 @@ void ColumnLowCardinality::updateHashFast(SipHash & hash) const
getDictionary().getNestedColumn()->updateHashFast(hash);
}
void ColumnLowCardinality::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
MutableColumnPtr ColumnLowCardinality::cloneResized(size_t size) const
{
auto unique_ptr = dictionary.getColumnUniquePtr();
@ -354,15 +371,6 @@ int ColumnLowCardinality::compareAtWithCollation(size_t n, size_t m, const IColu
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
void ColumnLowCardinality::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnLowCardinality>(
assert_cast<const ColumnLowCardinality &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnLowCardinality::hasEqualValues() const
{
if (getDictionary().size() <= 1)
@ -502,7 +510,7 @@ void ColumnLowCardinality::setSharedDictionary(const ColumnPtr & column_unique)
ColumnLowCardinality::MutablePtr ColumnLowCardinality::cutAndCompact(size_t start, size_t length) const
{
auto sub_positions = IColumn::mutate(idx.getPositions()->cut(start, length));
auto new_column_unique = Dictionary::compact(dictionary.getColumnUnique(), sub_positions);
auto new_column_unique = Dictionary::compact(getDictionary(), sub_positions);
return ColumnLowCardinality::create(std::move(new_column_unique), std::move(sub_positions));
}
@ -812,6 +820,20 @@ void ColumnLowCardinality::Index::updateWeakHash(WeakHash32 & hash, WeakHash32 &
callForType(std::move(update_weak_hash), size_of_type);
}
void ColumnLowCardinality::Index::collectSerializedValueSizes(
PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const
{
auto func = [&](auto x)
{
using CurIndexType = decltype(x);
auto & data = getPositionsData<CurIndexType>();
size_t rows = sizes.size();
for (size_t i = 0; i < rows; ++i)
sizes[i] += dict_sizes[data[i]];
};
callForType(std::move(func), size_of_type);
}
ColumnLowCardinality::Dictionary::Dictionary(MutableColumnPtr && column_unique_, bool is_shared)
: column_unique(std::move(column_unique_)), shared(is_shared)

View File

@ -1,9 +1,10 @@
#pragma once
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <Columns/IColumnUnique.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include "ColumnsNumber.h"
#include <Common/typeid_cast.h>
namespace DB
@ -23,9 +24,9 @@ namespace ErrorCodes
*
* @note The indices column always contains the default value (empty StringRef) with the first index.
*/
class ColumnLowCardinality final : public COWHelper<IColumn, ColumnLowCardinality>
class ColumnLowCardinality final : public COWHelper<IColumnHelper<ColumnLowCardinality>, ColumnLowCardinality>
{
friend class COWHelper<IColumn, ColumnLowCardinality>;
friend class COWHelper<IColumnHelper<ColumnLowCardinality>, ColumnLowCardinality>;
ColumnLowCardinality(MutableColumnPtr && column_unique, MutableColumnPtr && indexes, bool is_shared = false);
ColumnLowCardinality(const ColumnLowCardinality & other) = default;
@ -34,7 +35,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnLowCardinality>;
using Base = COWHelper<IColumnHelper<ColumnLowCardinality>, ColumnLowCardinality>;
static Ptr create(const ColumnPtr & column_unique_, const ColumnPtr & indexes_, bool is_shared = false)
{
return ColumnLowCardinality::create(column_unique_->assumeMutable(), indexes_->assumeMutable(), is_shared);
@ -88,7 +89,10 @@ public:
void popBack(size_t n) override { idx.popBack(n); }
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
@ -125,10 +129,6 @@ public:
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;
bool hasEqualValues() const override;
@ -152,8 +152,6 @@ public:
std::vector<MutableColumnPtr> scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
void getExtremes(Field & min, Field & max) const override
{
return dictionary.getColumnUnique().getNestedColumn()->index(getIndexes(), 0)->getExtremes(min, max); /// TODO: optimize
@ -315,6 +313,8 @@ public:
void updateWeakHash(WeakHash32 & hash, WeakHash32 & dict_hash) const;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const PaddedPODArray<UInt64> & dict_sizes) const;
private:
WrappedPtr positions;
size_t size_of_type = 0;

View File

@ -1,7 +1,5 @@
#include <Columns/ColumnMap.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/IColumnImpl.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Common/typeid_cast.h>
@ -120,11 +118,16 @@ void ColumnMap::popBack(size_t n)
nested->popBack(n);
}
StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
return nested->serializeValueIntoArena(n, arena, begin);
}
char * ColumnMap::serializeValueIntoMemory(size_t n, char * memory) const
{
return nested->serializeValueIntoMemory(n, memory);
}
const char * ColumnMap::deserializeAndInsertFromArena(const char * pos)
{
return nested->deserializeAndInsertFromArena(pos);
@ -208,19 +211,6 @@ int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direct
return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint);
}
void ColumnMap::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnMap>(assert_cast<const ColumnMap &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnMap::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnMap>();
}
void ColumnMap::getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const
{
@ -233,11 +223,6 @@ void ColumnMap::updatePermutation(IColumn::PermutationSortDirection direction, I
nested->updatePermutation(direction, stability, limit, nan_direction_hint, res, equal_ranges);
}
void ColumnMap::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnMap::reserve(size_t n)
{
nested->reserve(n);
@ -310,21 +295,6 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const
return false;
}
double ColumnMap::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnMap>(sample_ratio);
}
UInt64 ColumnMap::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnMap>();
}
void ColumnMap::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnMap>(indices, from, limit);
}
ColumnPtr ColumnMap::compress() const
{
auto compressed = nested->compress();

View File

@ -10,10 +10,10 @@ namespace DB
/** Column, that stores a nested Array(Tuple(key, value)) column.
*/
class ColumnMap final : public COWHelper<IColumn, ColumnMap>
class ColumnMap final : public COWHelper<IColumnHelper<ColumnMap>, ColumnMap>
{
private:
friend class COWHelper<IColumn, ColumnMap>;
friend class COWHelper<IColumnHelper<ColumnMap>, ColumnMap>;
WrappedPtr nested;
@ -25,7 +25,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnMap>;
using Base = COWHelper<IColumnHelper<ColumnMap>, ColumnMap>;
static Ptr create(const ColumnPtr & keys, const ColumnPtr & values, const ColumnPtr & offsets)
{
@ -59,7 +59,8 @@ public:
bool tryInsert(const Field & x) override;
void insertDefault() override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
@ -73,12 +74,7 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
@ -94,9 +90,6 @@ public:
void forEachSubcolumn(MutableColumnCallback callback) override;
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override { nested->finalize(); }
bool isFinalized() const override { return nested->isFinalized(); }

View File

@ -2,16 +2,11 @@
#include <Common/SipHash.h>
#include <Common/assert_cast.h>
#include <Common/WeakHash.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnsDateTime.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnLowCardinality.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#if USE_EMBEDDED_COMPILER
#include <DataTypes/Native.h>
@ -35,7 +30,6 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
{
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = getNestedColumn().convertToFullColumnIfConst();
nested_type = nested_column->getDataType();
if (!getNestedColumn().canBeInsideNullable())
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName());
@ -136,77 +130,35 @@ void ColumnNullable::insertData(const char * pos, size_t length)
}
}
StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
const auto & arr = getNullMapData();
static constexpr auto s = sizeof(arr[0]);
char * pos;
switch (nested_type)
{
case TypeIndex::UInt8:
return static_cast<const ColumnUInt8 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt16:
return static_cast<const ColumnUInt16 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt32:
return static_cast<const ColumnUInt32 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt64:
return static_cast<const ColumnUInt64 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt128:
return static_cast<const ColumnUInt128 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UInt256:
return static_cast<const ColumnUInt256 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int8:
return static_cast<const ColumnInt8 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int16:
return static_cast<const ColumnInt16 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int32:
return static_cast<const ColumnInt32 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int64:
return static_cast<const ColumnInt64 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int128:
return static_cast<const ColumnInt128 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Int256:
return static_cast<const ColumnInt256 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Float32:
return static_cast<const ColumnFloat32 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Float64:
return static_cast<const ColumnFloat64 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Date:
return static_cast<const ColumnDate *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Date32:
return static_cast<const ColumnDate32 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::DateTime:
return static_cast<const ColumnDateTime *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::DateTime64:
return static_cast<const ColumnDateTime64 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::String:
return static_cast<const ColumnString *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::FixedString:
return static_cast<const ColumnFixedString *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Decimal32:
return static_cast<const ColumnDecimal<Decimal32> *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Decimal64:
return static_cast<const ColumnDecimal<Decimal64> *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Decimal128:
return static_cast<const ColumnDecimal<Decimal128> *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::Decimal256:
return static_cast<const ColumnDecimal<Decimal256> *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::UUID:
return static_cast<const ColumnUUID *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::IPv4:
return static_cast<const ColumnIPv4 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
case TypeIndex::IPv6:
return static_cast<const ColumnIPv6 *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
default:
pos = arena.allocContinue(s, begin);
memcpy(pos, &arr[n], s);
if (arr[n])
return StringRef(pos, s);
auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
/// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
return StringRef(nested_ref.data - s, nested_ref.size + s);
}
auto * pos = arena.allocContinue(s, begin);
memcpy(pos, &arr[n], s);
if (arr[n])
return StringRef(pos, s);
auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
/// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
return StringRef(nested_ref.data - s, nested_ref.size + s);
}
char * ColumnNullable::serializeValueIntoMemory(size_t n, char * memory) const
{
const auto & arr = getNullMapData();
static constexpr auto s = sizeof(arr[0]);
memcpy(memory, &arr[n], s);
++memory;
if (arr[n])
return memory;
return getNestedColumn().serializeValueIntoMemory(n, memory);
}
const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos)
@ -418,19 +370,6 @@ int ColumnNullable::compareAtWithCollation(size_t n, size_t m, const IColumn & r
return compareAtImpl(n, m, rhs_, null_direction_hint, &collator);
}
void ColumnNullable::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnNullable>(assert_cast<const ColumnNullable &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnNullable::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnNullable>();
}
void ColumnNullable::getPermutationImpl(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int null_direction_hint, Permutation & res, const Collator * collator) const
{
@ -680,11 +619,6 @@ void ColumnNullable::updatePermutationWithCollation(const Collator & collator, I
updatePermutationImpl(direction, stability, limit, null_direction_hint, res, equal_ranges, &collator);
}
void ColumnNullable::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnNullable::reserve(size_t n)
{
getNestedColumn().reserve(n);

View File

@ -1,12 +1,10 @@
#pragma once
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ColumnsNumber.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include "Core/TypeId.h"
#include "config.h"
@ -27,10 +25,10 @@ using ConstNullMapPtr = const NullMap *;
/// over a bitmap because columns are usually stored on disk as compressed
/// files. In this regard, using a bitmap instead of a byte map would
/// greatly complicate the implementation with little to no benefits.
class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>
class ColumnNullable final : public COWHelper<IColumnHelper<ColumnNullable>, ColumnNullable>
{
private:
friend class COWHelper<IColumn, ColumnNullable>;
friend class COWHelper<IColumnHelper<ColumnNullable>, ColumnNullable>;
ColumnNullable(MutableColumnPtr && nested_column_, MutableColumnPtr && null_map_);
ColumnNullable(const ColumnNullable &) = default;
@ -39,7 +37,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnNullable>;
using Base = COWHelper<IColumnHelper<ColumnNullable>, ColumnNullable>;
static Ptr create(const ColumnPtr & nested_column_, const ColumnPtr & null_map_)
{
return ColumnNullable::create(nested_column_->assumeMutable(), null_map_->assumeMutable());
@ -63,7 +61,8 @@ public:
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -96,11 +95,7 @@ public:
#endif
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int null_direction_hint, const Collator &) const override;
bool hasEqualValues() const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
@ -124,13 +119,6 @@ public:
// Special function for nullable minmax index
void getExtremesNullLast(Field & min, Field & max) const;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnNullable>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void forEachSubcolumn(MutableColumnCallback callback) override
@ -154,21 +142,6 @@ public:
return false;
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnNullable>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnNullable>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
getIndicesOfNonDefaultRowsImpl<ColumnNullable>(indices, from, limit);
}
ColumnPtr createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
bool isNullable() const override { return true; }
@ -215,8 +188,6 @@ public:
private:
WrappedPtr nested_column;
WrappedPtr null_map;
// optimize serializeValueIntoArena
TypeIndex nested_type;
template <bool negative>
void applyNullMapImpl(const NullMap & map);

View File

@ -12,7 +12,6 @@
#include <Interpreters/castColumn.h>
#include <Interpreters/convertFieldToType.h>
#include <Common/HashTable/HashSet.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <numeric>
@ -852,14 +851,6 @@ void ColumnObject::getPermutation(PermutationSortDirection, PermutationSortStabi
iota(res.data(), res.size(), size_t(0));
}
void ColumnObject::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnObject>(assert_cast<const ColumnObject &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
void ColumnObject::getExtremes(Field & min, Field & max) const
{
if (num_rows == 0)
@ -874,16 +865,6 @@ void ColumnObject::getExtremes(Field & min, Field & max) const
}
}
MutableColumns ColumnObject::scatter(ColumnIndex num_columns, const Selector & selector) const
{
return scatterImpl<ColumnObject>(num_columns, selector);
}
void ColumnObject::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
const ColumnObject::Subcolumn & ColumnObject::getSubcolumn(const PathInData & key) const
{
if (const auto * node = subcolumns.findLeaf(key))

View File

@ -48,7 +48,7 @@ FieldInfo getFieldInfo(const Field & field);
* a trie-like structure. ColumnObject is not suitable for writing into tables
* and it should be converted to Tuple with fixed set of subcolumns before that.
*/
class ColumnObject final : public COWHelper<IColumn, ColumnObject>
class ColumnObject final : public COWHelper<IColumnHelper<ColumnObject>, ColumnObject>
{
public:
/** Class that represents one subcolumn.
@ -229,23 +229,17 @@ public:
/// Order of rows in ColumnObject is undefined.
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer) override;
/// All other methods throw exception.
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeConcrete(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); }
char * serializeValueIntoMemory(size_t, char *) const override { throwMustBeConcrete(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }

View File

@ -1,13 +1,14 @@
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnSparse.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <Columns/ColumnTuple.h>
#include <Common/HashTable/Hash.h>
#include <Common/SipHash.h>
#include <Common/WeakHash.h>
#include <Common/iota.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <algorithm>
#include <bit>
@ -152,11 +153,16 @@ void ColumnSparse::insertData(const char * pos, size_t length)
insertSingleValue([&](IColumn & column) { column.insertData(pos, length); });
}
StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
return values->serializeValueIntoArena(getValueIndex(n), arena, begin);
}
char * ColumnSparse::serializeValueIntoMemory(size_t n, char * memory) const
{
return values->serializeValueIntoMemory(getValueIndex(n), memory);
}
const char * ColumnSparse::deserializeAndInsertFromArena(const char * pos)
{
const char * res = nullptr;
@ -730,16 +736,6 @@ UInt64 ColumnSparse::getNumberOfDefaultRows() const
return _size - offsets->size();
}
MutableColumns ColumnSparse::scatter(ColumnIndex num_columns, const Selector & selector) const
{
return scatterImpl<ColumnSparse>(num_columns, selector);
}
void ColumnSparse::gather(ColumnGathererStream & gatherer_stream)
{
gatherer_stream.gather(*this);
}
ColumnPtr ColumnSparse::compress() const
{
auto values_compressed = values->compress();

View File

@ -1,7 +1,6 @@
#pragma once
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ColumnsNumber.h>
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
@ -18,10 +17,10 @@ namespace DB
* values contains also one default value at 0 position to make
* implementation of execution of functions and sorting more convenient.
*/
class ColumnSparse final : public COWHelper<IColumn, ColumnSparse>
class ColumnSparse final : public COWHelper<IColumnHelper<ColumnSparse>, ColumnSparse>
{
private:
friend class COWHelper<IColumn, ColumnSparse>;
friend class COWHelper<IColumnHelper<ColumnSparse>, ColumnSparse>;
explicit ColumnSparse(MutableColumnPtr && values_);
ColumnSparse(MutableColumnPtr && values_, MutableColumnPtr && offsets_, size_t size_);
@ -31,7 +30,7 @@ public:
static constexpr auto DEFAULT_ROWS_SEARCH_SAMPLE_RATIO = 0.1;
static constexpr auto DEFAULT_RATIO_FOR_SPARSE_SERIALIZATION = 0.95;
using Base = COWHelper<IColumn, ColumnSparse>;
using Base = COWHelper<IColumnHelper<ColumnSparse>, ColumnSparse>;
static Ptr create(const ColumnPtr & values_, const ColumnPtr & offsets_, size_t size_)
{
return Base::create(values_->assumeMutable(), offsets_->assumeMutable(), size_);
@ -78,7 +77,8 @@ public:
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char *) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
@ -135,10 +135,6 @@ public:
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void forEachSubcolumn(MutableColumnCallback callback) override;

View File

@ -4,7 +4,6 @@
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <Common/Arena.h>
#include <Common/HashTable/Hash.h>
#include <Common/WeakHash.h>
@ -27,7 +26,7 @@ namespace ErrorCodes
ColumnString::ColumnString(const ColumnString & src)
: COWHelper<IColumn, ColumnString>(src),
: COWHelper<IColumnHelper<ColumnString>, ColumnString>(src),
offsets(src.offsets.begin(), src.offsets.end()),
chars(src.chars.begin(), src.chars.end())
{
@ -213,34 +212,69 @@ ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
}
StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
void ColumnString::collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const
{
size_t string_size = sizeAt(n);
size_t offset = offsetAt(n);
constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
char * pos;
if (null_bit)
if (empty())
return;
size_t rows = size();
if (sizes.empty())
sizes.resize_fill(rows);
else if (sizes.size() != rows)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of sizes: {} doesn't match rows_num: {}. It is a bug", sizes.size(), rows);
if (is_null)
{
res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(string_size) + string_size;
pos = arena.allocContinue(res.size, begin);
res.data = pos;
memcpy(pos, null_bit, null_bit_size);
if (*null_bit) return res;
pos += null_bit_size;
for (size_t i = 0; i < rows; ++i)
{
if (is_null[i])
{
++sizes[i];
}
else
{
size_t string_size = sizeAt(i);
sizes[i] += sizeof(string_size) + string_size + 1 /* null byte */;
}
}
}
else
{
res.size = sizeof(string_size) + string_size;
pos = arena.allocContinue(res.size, begin);
res.data = pos;
for (size_t i = 0; i < rows; ++i)
{
size_t string_size = sizeAt(i);
sizes[i] += sizeof(string_size) + string_size;
}
}
}
StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
size_t string_size = sizeAt(n);
size_t offset = offsetAt(n);
StringRef res;
res.size = sizeof(string_size) + string_size;
char * pos = arena.allocContinue(res.size, begin);
memcpy(pos, &string_size, sizeof(string_size));
memcpy(pos + sizeof(string_size), &chars[offset], string_size);
res.data = pos;
return res;
}
char * ColumnString::serializeValueIntoMemory(size_t n, char * memory) const
{
size_t string_size = sizeAt(n);
size_t offset = offsetAt(n);
memcpy(memory, &string_size, sizeof(string_size));
memory += sizeof(string_size);
memcpy(memory, &chars[offset], string_size);
return memory + string_size;
}
const char * ColumnString::deserializeAndInsertFromArena(const char * pos)
{
const size_t string_size = unalignedLoad<size_t>(pos);
@ -303,20 +337,6 @@ ColumnPtr ColumnString::indexImpl(const PaddedPODArray<Type> & indexes, size_t l
return res;
}
void ColumnString::compareColumn(
const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnString>(assert_cast<const ColumnString &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool ColumnString::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnString>();
}
struct ColumnString::ComparatorBase
{
const ColumnString & parent;
@ -482,13 +502,6 @@ ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
return res;
}
void ColumnString::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnString::reserve(size_t n)
{
offsets.reserve_exact(n);

View File

@ -23,14 +23,14 @@ class Arena;
/** Column for String values.
*/
class ColumnString final : public COWHelper<IColumn, ColumnString>
class ColumnString final : public COWHelper<IColumnHelper<ColumnString>, ColumnString>
{
public:
using Char = UInt8;
using Chars = PaddedPODArray<UInt8>;
private:
friend class COWHelper<IColumn, ColumnString>;
friend class COWHelper<IColumnHelper<ColumnString>, ColumnString>;
/// Maps i'th position to offset to i+1'th element. Last offset maps to the end of all chars (is the size of all chars).
Offsets offsets;
@ -179,7 +179,10 @@ public:
offsets.resize_assume_reserved(offsets.size() - n);
}
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
@ -234,12 +237,6 @@ public:
return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1);
}
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
bool hasEqualValues() const override;
/// Variant of compareAt for string comparison with respect of collation.
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const override;
@ -258,13 +255,6 @@ public:
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override
{
return scatterImpl<ColumnString>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void reserve(size_t n) override;
@ -272,7 +262,6 @@ public:
void getExtremes(Field & min, Field & max) const override;
bool canBeInsideNullable() const override { return true; }
bool structureEquals(const IColumn & rhs) const override
@ -280,21 +269,6 @@ public:
return typeid(rhs) == typeid(ColumnString);
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return getRatioOfDefaultRowsImpl<ColumnString>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return getNumberOfDefaultRowsImpl<ColumnString>();
}
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override
{
return getIndicesOfNonDefaultRowsImpl<ColumnString>(indices, from, limit);
}
Chars & getChars() { return chars; }
const Chars & getChars() const { return chars; }

View File

@ -3,15 +3,15 @@
#include <Columns/ColumnCompressed.h>
#include <Columns/IColumnImpl.h>
#include <Core/Field.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include <Common/iota.h>
#include <Common/typeid_cast.h>
#include <DataTypes/Serializations/SerializationInfoTuple.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <base/sort.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include <Common/iota.h>
#include <Common/typeid_cast.h>
namespace DB
@ -197,7 +197,7 @@ void ColumnTuple::popBack(size_t n)
column->popBack(n);
}
StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
StringRef res(begin, 0);
for (const auto & column : columns)
@ -210,6 +210,14 @@ StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char con
return res;
}
char * ColumnTuple::serializeValueIntoMemory(size_t n, char * memory) const
{
for (const auto & column : columns)
memory = column->serializeValueIntoMemory(n, memory);
return memory;
}
const char * ColumnTuple::deserializeAndInsertFromArena(const char * pos)
{
for (auto & column : columns)
@ -351,24 +359,11 @@ int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_dire
return compareAtImpl(n, m, rhs, nan_direction_hint);
}
void ColumnTuple::compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnTuple>(assert_cast<const ColumnTuple &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
int ColumnTuple::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const
{
return compareAtImpl(n, m, rhs, nan_direction_hint, &collator);
}
bool ColumnTuple::hasEqualValues() const
{
return hasEqualValuesImpl<ColumnTuple>();
}
template <bool positive>
struct ColumnTuple::Less
{
@ -457,11 +452,6 @@ void ColumnTuple::updatePermutationWithCollation(const Collator & collator, ICol
updatePermutationImpl(direction, stability, limit, nan_direction_hint, res, equal_ranges, &collator);
}
void ColumnTuple::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
void ColumnTuple::reserve(size_t n)
{
const size_t tuple_size = columns.size();
@ -592,21 +582,6 @@ ColumnPtr ColumnTuple::compress() const
});
}
double ColumnTuple::getRatioOfDefaultRows(double sample_ratio) const
{
return getRatioOfDefaultRowsImpl<ColumnTuple>(sample_ratio);
}
UInt64 ColumnTuple::getNumberOfDefaultRows() const
{
return getNumberOfDefaultRowsImpl<ColumnTuple>();
}
void ColumnTuple::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnTuple>(indices, from, limit);
}
void ColumnTuple::finalize()
{
for (auto & column : columns)

View File

@ -12,10 +12,10 @@ namespace DB
* Mixed constant/non-constant columns is prohibited in tuple
* for implementation simplicity.
*/
class ColumnTuple final : public COWHelper<IColumn, ColumnTuple>
class ColumnTuple final : public COWHelper<IColumnHelper<ColumnTuple>, ColumnTuple>
{
private:
friend class COWHelper<IColumn, ColumnTuple>;
friend class COWHelper<IColumnHelper<ColumnTuple>, ColumnTuple>;
using TupleColumns = std::vector<WrappedPtr>;
TupleColumns columns;
@ -30,7 +30,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
*/
using Base = COWHelper<IColumn, ColumnTuple>;
using Base = COWHelper<IColumnHelper<ColumnTuple>, ColumnTuple>;
static Ptr create(const Columns & columns);
static Ptr create(const TupleColumns & columns);
static Ptr create(Columns && arg) { return create(arg); }
@ -62,7 +62,8 @@ public:
void insertFrom(const IColumn & src_, size_t n) override;
void insertDefault() override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
@ -75,13 +76,8 @@ public:
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
@ -103,9 +99,6 @@ public:
bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override;
ColumnPtr compress() const override;
double getRatioOfDefaultRows(double sample_ratio) const override;
UInt64 getNumberOfDefaultRows() const override;
void getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const override;
void finalize() override;
bool isFinalized() const override;

View File

@ -1,7 +1,6 @@
#pragma once
#include <Columns/IColumnUnique.h>
#include <Columns/IColumnImpl.h>
#include <Columns/ReverseIndex.h>
#include <Columns/ColumnVector.h>
@ -80,7 +79,9 @@ public:
Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash_func) const override
{
@ -394,7 +395,21 @@ size_t ColumnUnique<ColumnType>::uniqueInsertData(const char * pos, size_t lengt
}
template <typename ColumnType>
StringRef ColumnUnique<ColumnType>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
void ColumnUnique<ColumnType>::collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const
{
/// nullable is handled internally.
chassert(is_null == nullptr);
if (IColumn::empty())
return;
if (is_nullable)
column_holder->collectSerializedValueSizes(sizes, assert_cast<const ColumnUInt8 &>(*nested_null_mask).getData().data());
else
column_holder->collectSerializedValueSizes(sizes, nullptr);
}
template <typename ColumnType>
StringRef ColumnUnique<ColumnType>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
if (is_nullable)
{
@ -417,6 +432,22 @@ StringRef ColumnUnique<ColumnType>::serializeValueIntoArena(size_t n, Arena & ar
return column_holder->serializeValueIntoArena(n, arena, begin);
}
template <typename ColumnType>
char * ColumnUnique<ColumnType>::serializeValueIntoMemory(size_t n, char * memory) const
{
if (is_nullable)
{
UInt8 flag = (n == getNullValueIndex() ? 1 : 0);
unalignedStore<UInt8>(memory, flag);
++memory;
if (n == getNullValueIndex())
return memory;
}
return column_holder->serializeValueIntoMemory(n, memory);
}
template <typename ColumnType>
size_t ColumnUnique<ColumnType>::uniqueDeserializeAndInsertFromArena(const char * pos, const char *& new_pos)
{

View File

@ -643,7 +643,7 @@ void ColumnVariant::popBack(size_t n)
offsets->popBack(n);
}
StringRef ColumnVariant::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
StringRef ColumnVariant::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
/// During any serialization/deserialization we should always use global discriminators.
Discriminator global_discr = globalDiscriminatorAt(n);
@ -1085,11 +1085,6 @@ MutableColumns ColumnVariant::scatter(ColumnIndex num_columns, const Selector &
return result;
}
void ColumnVariant::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
bool ColumnVariant::hasEqualValues() const
{
if (local_discriminators->empty() || hasOnlyNulls())
@ -1120,15 +1115,6 @@ int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_di
return getVariantByGlobalDiscriminator(left_discr).compareAt(offsetAt(n), rhs_variant.offsetAt(m), rhs_variant.getVariantByGlobalDiscriminator(right_discr), nan_direction_hint);
}
void ColumnVariant::compareColumn(
const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const
{
return doCompareColumn<ColumnVariant>(assert_cast<const ColumnVariant &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
struct ColumnVariant::ComparatorBase
{
const ColumnVariant & parent;
@ -1304,7 +1290,14 @@ UInt64 ColumnVariant::getNumberOfDefaultRows() const
void ColumnVariant::getIndicesOfNonDefaultRows(Offsets & indices, size_t from, size_t limit) const
{
return getIndicesOfNonDefaultRowsImpl<ColumnVariant>(indices, from, limit);
size_t to = limit && from + limit < size() ? from + limit : size();
indices.reserve(indices.size() + to - from);
for (size_t i = from; i < to; ++i)
{
if (!isDefaultAt(i))
indices.push_back(i);
}
}
void ColumnVariant::finalize()

View File

@ -54,7 +54,7 @@ namespace DB
* 1 2
*
*/
class ColumnVariant final : public COWHelper<IColumn, ColumnVariant>
class ColumnVariant final : public COWHelper<IColumnHelper<ColumnVariant>, ColumnVariant>
{
public:
using Discriminator = UInt8;
@ -74,7 +74,7 @@ public:
using ComparatorEqual = ComparatorEqualImpl<ComparatorBase>;
private:
friend class COWHelper<IColumn, ColumnVariant>;
friend class COWHelper<IColumnHelper<ColumnVariant>, ColumnVariant>;
using NestedColumns = std::vector<WrappedPtr>;
@ -103,7 +103,7 @@ public:
/** Create immutable column using immutable arguments. This arguments may be shared with other variants.
* Use IColumn::mutate in order to make mutable column and mutate shared nested variants.
*/
using Base = COWHelper<IColumn, ColumnVariant>;
using Base = COWHelper<IColumnHelper<ColumnVariant>, ColumnVariant>;
static Ptr create(const Columns & variants_) { return create(variants_, {}); }
static Ptr create(const Columns & variants_, const std::vector<Discriminator> & local_to_global_discriminators_);
static Ptr create(const ColumnPtr & local_discriminators_, const Columns & variants_) { return create(local_discriminators_, variants_, {}); }
@ -185,7 +185,7 @@ public:
void insertDefault() override;
void insertManyDefaults(size_t length) override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
@ -199,13 +199,7 @@ public:
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
void gather(ColumnGathererStream & gatherer_stream) override;
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,

View File

@ -52,31 +52,6 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
template <typename T>
StringRef ColumnVector<T>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
char * pos;
if (null_bit)
{
res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T);
pos = arena.allocContinue(res.size, begin);
res.data = pos;
memcpy(pos, null_bit, null_bit_size);
if (*null_bit) return res;
pos += null_bit_size;
}
else
{
res.size = sizeof(T);
pos = arena.allocContinue(res.size, begin);
res.data = pos;
}
unalignedStore<T>(pos, data[n]);
return res;
}
template <typename T>
const char * ColumnVector<T>::deserializeAndInsertFromArena(const char * pos)
{
@ -883,12 +858,6 @@ ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
return res;
}
template <typename T>
void ColumnVector<T>::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
}
template <typename T>
void ColumnVector<T>::getExtremes(Field & min, Field & max) const
{

View File

@ -1,16 +1,15 @@
#pragma once
#include <cmath>
#include <Columns/ColumnVectorHelper.h>
#include <Columns/ColumnFixedSizeHelper.h>
#include <Columns/IColumn.h>
#include <Columns/IColumnImpl.h>
#include <Common/TargetSpecific.h>
#include <Common/assert_cast.h>
#include <Core/CompareHelper.h>
#include <Core/Field.h>
#include <Core/TypeId.h>
#include <base/TypeName.h>
#include <base/unaligned.h>
#include <Common/TargetSpecific.h>
#include <Common/assert_cast.h>
#include "config.h"
@ -30,13 +29,13 @@ namespace ErrorCodes
/** A template for columns that use a simple array to store.
*/
template <typename T>
class ColumnVector final : public COWHelper<ColumnVectorHelper, ColumnVector<T>>
class ColumnVector final : public COWHelper<IColumnHelper<ColumnVector<T>, ColumnFixedSizeHelper>, ColumnVector<T>>
{
static_assert(!is_decimal<T>);
private:
using Self = ColumnVector;
friend class COWHelper<ColumnVectorHelper, Self>;
friend class COWHelper<IColumnHelper<Self, ColumnFixedSizeHelper>, Self>;
struct less;
struct less_stable;
@ -101,8 +100,6 @@ public:
data.resize_assume_reserved(data.size() - n);
}
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
@ -158,19 +155,6 @@ public:
#endif
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override
{
return this->template doCompareColumn<Self>(assert_cast<const Self &>(rhs), rhs_row_num, row_indexes,
compare_results, direction, nan_direction_hint);
}
bool hasEqualValues() const override
{
return this->template hasEqualValuesImpl<Self>();
}
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
@ -265,13 +249,6 @@ public:
void getExtremes(Field & min, Field & max) const override;
MutableColumns scatter(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector) const override
{
return this->template scatterImpl<Self>(num_columns, selector);
}
void gather(ColumnGathererStream & gatherer_stream) override;
bool canBeInsideNullable() const override { return true; }
bool isFixedAndContiguous() const override { return true; }
size_t sizeOfValueIfFixed() const override { return sizeof(T); }
@ -293,21 +270,6 @@ public:
return typeid(rhs) == typeid(ColumnVector<T>);
}
double getRatioOfDefaultRows(double sample_ratio) const override
{
return this->template getRatioOfDefaultRowsImpl<Self>(sample_ratio);
}
UInt64 getNumberOfDefaultRows() const override
{
return this->template getNumberOfDefaultRowsImpl<Self>();
}
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override
{
return this->template getIndicesOfNonDefaultRowsImpl<Self>(indices, from, limit);
}
ColumnPtr createWithOffsets(const IColumn::Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const override;
ColumnPtr compress() const override;

View File

@ -1,12 +1,27 @@
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/IColumnDummy.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnFunction.h>
#include <Columns/ColumnLowCardinality.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnObject.h>
#include <Columns/ColumnSparse.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVariant.h>
#include <Columns/ColumnVector.h>
#include <Core/Field.h>
#include <DataTypes/Serializations/SerializationInfo.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
namespace DB
{
@ -38,8 +53,12 @@ void IColumn::insertFrom(const IColumn & src, size_t n)
ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const ColumnConst & column_with_default_value, size_t total_rows, size_t shift) const
{
if (offsets.size() + shift != size())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Incompatible sizes of offsets ({}), shift ({}) and size of column {}", offsets.size(), shift, size());
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Incompatible sizes of offsets ({}), shift ({}) and size of column {}",
offsets.size(),
shift,
size());
auto res = cloneEmpty();
res->reserve(total_rows);
@ -94,4 +113,355 @@ bool isColumnConst(const IColumn & column)
return checkColumn<ColumnConst>(column);
}
template <typename Derived, typename Parent>
MutableColumns IColumnHelper<Derived, Parent>::scatter(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector) const
{
const auto & self = static_cast<const Derived &>(*this);
size_t num_rows = self.size();
if (num_rows != selector.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of selector: {} doesn't match size of column: {}", selector.size(), num_rows);
MutableColumns columns(num_columns);
for (auto & column : columns)
column = self.cloneEmpty();
{
size_t reserve_size = static_cast<size_t>(num_rows * 1.1 / num_columns); /// 1.1 is just a guess. Better to use n-sigma rule.
if (reserve_size > 1)
for (auto & column : columns)
column->reserve(reserve_size);
}
for (size_t i = 0; i < num_rows; ++i)
static_cast<Derived &>(*columns[selector[i]]).insertFrom(*this, i);
return columns;
}
template <typename Derived, typename Parent>
void IColumnHelper<Derived, Parent>::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(static_cast<Derived &>(*this));
}
template <typename Derived, bool reversed>
void compareImpl(
const Derived & lhs,
const Derived & rhs,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes [[maybe_unused]],
PaddedPODArray<Int8> & compare_results,
int nan_direction_hint)
{
size_t num_rows = lhs.size();
if (compare_results.empty())
compare_results.resize(num_rows);
else if (compare_results.size() != num_rows)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Size of compare_results: {} doesn't match rows_num: {}",
compare_results.size(),
num_rows);
for (size_t row = 0; row < num_rows; ++row)
{
int res = lhs.compareAt(row, rhs_row_num, rhs, nan_direction_hint);
assert(res == 1 || res == -1 || res == 0);
compare_results[row] = static_cast<Int8>(res);
if constexpr (reversed)
compare_results[row] = -compare_results[row];
}
}
template <typename Derived, bool reversed>
void compareWithIndexImpl(
const Derived & lhs,
const Derived & rhs,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes [[maybe_unused]],
PaddedPODArray<Int8> & compare_results,
int nan_direction_hint)
{
size_t num_rows = lhs.size();
if (compare_results.empty())
compare_results.resize(num_rows);
else if (compare_results.size() != num_rows)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Size of compare_results: {} doesn't match rows_num: {}",
compare_results.size(),
num_rows);
UInt64 * next_index = row_indexes->data();
for (auto row : *row_indexes)
{
int res = lhs.compareAt(row, rhs_row_num, rhs, nan_direction_hint);
assert(res == 1 || res == -1 || res == 0);
compare_results[row] = static_cast<Int8>(res);
if constexpr (reversed)
compare_results[row] = -compare_results[row];
if (compare_results[row] == 0)
{
*next_index = row;
++next_index;
}
}
size_t equal_row_indexes_size = next_index - row_indexes->data();
row_indexes->resize(equal_row_indexes_size);
}
template <typename Derived, typename Parent>
void IColumnHelper<Derived, Parent>::compareColumn(
const IColumn & rhs_base,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int direction,
int nan_direction_hint) const
{
const auto & lhs = static_cast<const Derived &>(*this);
const auto & rhs = static_cast<const Derived &>(rhs_base);
if (direction < 0)
{
if (row_indexes)
compareWithIndexImpl<Derived, true>(lhs, rhs, rhs_row_num, row_indexes, compare_results, nan_direction_hint);
else
compareImpl<Derived, true>(lhs, rhs, rhs_row_num, row_indexes, compare_results, nan_direction_hint);
}
else if (row_indexes)
{
compareWithIndexImpl<Derived, false>(lhs, rhs, rhs_row_num, row_indexes, compare_results, nan_direction_hint);
}
else
{
compareImpl<Derived, false>(lhs, rhs, rhs_row_num, row_indexes, compare_results, nan_direction_hint);
}
}
template <typename Derived, typename Parent>
bool IColumnHelper<Derived, Parent>::hasEqualValues() const
{
const auto & self = static_cast<const Derived &>(*this);
size_t num_rows = self.size();
for (size_t i = 1; i < num_rows; ++i)
{
if (self.compareAt(i, 0, self, 0) != 0)
return false;
}
return true;
}
template <typename Derived, typename Parent>
double IColumnHelper<Derived, Parent>::getRatioOfDefaultRows(double sample_ratio) const
{
if (sample_ratio <= 0.0 || sample_ratio > 1.0)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Value of 'sample_ratio' must be in interval (0.0; 1.0], but got: {}", sample_ratio);
static constexpr auto max_number_of_rows_for_full_search = 1000;
const auto & self = static_cast<const Derived &>(*this);
size_t num_rows = self.size();
size_t num_sampled_rows = std::min(static_cast<size_t>(num_rows * sample_ratio), num_rows);
size_t num_checked_rows = 0;
size_t res = 0;
if (num_sampled_rows == num_rows || num_rows <= max_number_of_rows_for_full_search)
{
for (size_t i = 0; i < num_rows; ++i)
res += self.isDefaultAt(i);
num_checked_rows = num_rows;
}
else if (num_sampled_rows != 0)
{
for (size_t i = 0; i < num_rows; ++i)
{
if (num_checked_rows * num_rows <= i * num_sampled_rows)
{
res += self.isDefaultAt(i);
++num_checked_rows;
}
}
}
if (num_checked_rows == 0)
return 0.0;
return static_cast<double>(res) / num_checked_rows;
}
template <typename Derived, typename Parent>
UInt64 IColumnHelper<Derived, Parent>::getNumberOfDefaultRows() const
{
const auto & self = static_cast<const Derived &>(*this);
UInt64 res = 0;
size_t num_rows = self.size();
for (size_t i = 0; i < num_rows; ++i)
res += self.isDefaultAt(i);
return res;
}
template <typename Derived, typename Parent>
void IColumnHelper<Derived, Parent>::getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const
{
const auto & self = static_cast<const Derived &>(*this);
size_t to = limit && from + limit < self.size() ? from + limit : self.size();
indices.reserve_exact(indices.size() + to - from);
for (size_t i = from; i < to; ++i)
{
if (!self.isDefaultAt(i))
indices.push_back(i);
}
}
template <typename Derived, typename Parent>
StringRef
IColumnHelper<Derived, Parent>::serializeValueIntoArenaWithNull(size_t n, Arena & arena, char const *& begin, const UInt8 * is_null) const
{
const auto & self = static_cast<const Derived &>(*this);
if (is_null)
{
char * memory;
if (is_null[n])
{
memory = arena.allocContinue(1, begin);
*memory = 1;
return {memory, 1};
}
size_t sz = self.byteSizeAt(n) + 1 /* null byte */;
memory = arena.allocContinue(sz, begin);
*memory = 0;
self.serializeValueIntoMemory(n, memory + 1);
return {memory, sz};
}
else
{
return self.serializeValueIntoArena(n, arena, begin);
}
}
template <typename Derived, typename Parent>
StringRef IColumnHelper<Derived, Parent>::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
if constexpr (!std::is_base_of_v<ColumnFixedSizeHelper, Derived>)
return IColumn::serializeValueIntoArena(n, arena, begin);
const auto & self = static_cast<const Derived &>(*this);
size_t sz = self.byteSizeAt(n);
char * memory = arena.allocContinue(sz, begin);
self.serializeValueIntoMemory(n, memory);
return {memory, sz};
}
template <typename Derived, typename Parent>
char * IColumnHelper<Derived, Parent>::serializeValueIntoMemoryWithNull(size_t n, char * memory, const UInt8 * is_null) const
{
const auto & self = static_cast<const Derived &>(*this);
if (is_null)
{
*memory = is_null[n];
++memory;
if (is_null[n])
return memory;
}
return self.serializeValueIntoMemory(n, memory);
}
template <typename Derived, typename Parent>
char * IColumnHelper<Derived, Parent>::serializeValueIntoMemory(size_t n, char * memory) const
{
if constexpr (!std::is_base_of_v<ColumnFixedSizeHelper, Derived>)
return IColumn::serializeValueIntoMemory(n, memory);
const auto & self = static_cast<const Derived &>(*this);
auto raw_data = self.getDataAt(n);
memcpy(memory, raw_data.data, raw_data.size);
return memory + raw_data.size;
}
template <typename Derived, typename Parent>
void IColumnHelper<Derived, Parent>::collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const
{
if constexpr (!std::is_base_of_v<ColumnFixedSizeHelper, Derived>)
return IColumn::collectSerializedValueSizes(sizes, is_null);
const auto & self = static_cast<const Derived &>(*this);
size_t rows = self.size();
if (sizes.empty())
sizes.resize_fill(rows);
else if (sizes.size() != rows)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Size of sizes: {} doesn't match rows_num: {}. It is a bug", sizes.size(), rows);
if (rows == 0)
return;
size_t element_size = self.byteSizeAt(0);
if (is_null)
{
for (size_t i = 0; i < rows; ++i)
{
if (is_null[i])
++sizes[i];
else
sizes[i] += element_size + 1 /* null byte */;
}
}
else
{
for (auto & sz : sizes)
sz += element_size;
}
}
template class IColumnHelper<ColumnVector<UInt8>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt16>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt32>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt128>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UInt256>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int8>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int16>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int32>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int128>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Int256>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Float32>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<Float64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<UUID>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<IPv4>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnVector<IPv6>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<Decimal32>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<Decimal64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<Decimal128>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<Decimal256>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnDecimal<DateTime64>, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnFixedString, ColumnFixedSizeHelper>;
template class IColumnHelper<ColumnString, IColumn>;
template class IColumnHelper<ColumnLowCardinality, IColumn>;
template class IColumnHelper<ColumnNullable, IColumn>;
template class IColumnHelper<ColumnConst, IColumn>;
template class IColumnHelper<ColumnArray, IColumn>;
template class IColumnHelper<ColumnTuple, IColumn>;
template class IColumnHelper<ColumnMap, IColumn>;
template class IColumnHelper<ColumnSparse, IColumn>;
template class IColumnHelper<ColumnObject, IColumn>;
template class IColumnHelper<ColumnAggregateFunction, IColumn>;
template class IColumnHelper<ColumnFunction, IColumn>;
template class IColumnHelper<ColumnCompressed, IColumn>;
template class IColumnHelper<ColumnVariant, IColumn>;
template class IColumnHelper<IColumnDummy, IColumn>;
}

View File

@ -223,7 +223,38 @@ public:
* For example, to obtain unambiguous representation of Array of strings, strings data should be interleaved with their sizes.
* Parameter begin should be used with Arena::allocContinue.
*/
virtual StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const = 0;
virtual StringRef serializeValueIntoArena(size_t /* n */, Arena & /* arena */, char const *& /* begin */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeValueIntoArena is not supported for {}", getName());
}
/// Same as above but serialize into already allocated continuous memory.
/// Return pointer to the end of the serialization data.
virtual char * serializeValueIntoMemory(size_t /* n */, char * /* memory */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeValueIntoMemory is not supported for {}", getName());
}
/// Nullable variant to avoid calling virtualized method inside ColumnNullable.
virtual StringRef
serializeValueIntoArenaWithNull(size_t /* n */, Arena & /* arena */, char const *& /* begin */, const UInt8 * /* is_null */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeValueIntoArenaWithNull is not supported for {}", getName());
}
virtual char * serializeValueIntoMemoryWithNull(size_t /* n */, char * /* memory */, const UInt8 * /* is_null */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method serializeValueIntoMemoryWithNull is not supported for {}", getName());
}
/// Calculate all the sizes of serialized data in column, then added to `sizes`.
/// If `is_null` is not nullptr, also take null bit into account.
/// This is currently used to facilitate the allocation of memory for an entire continuous row
/// in a single step. For more details, refer to the HashMethodSerialized implementation.
virtual void collectSerializedValueSizes(PaddedPODArray<UInt64> & /* sizes */, const UInt8 * /* is_null */) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method collectSerializedValueSizes is not supported for {}", getName());
}
/// Deserializes a value that was serialized using IColumn::serializeValueIntoArena method.
/// Returns pointer to the position after the read data.
@ -574,43 +605,18 @@ public:
[[nodiscard]] String dumpStructure() const;
protected:
/// Template is to devirtualize calls to insertFrom method.
/// In derived classes (that use final keyword), implement scatter method as call to scatterImpl.
template <typename Derived>
std::vector<MutablePtr> scatterImpl(ColumnIndex num_columns, const Selector & selector) const;
template <typename Derived, bool reversed, bool use_indexes>
void compareImpl(const Derived & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int nan_direction_hint) const;
template <typename Derived>
void doCompareColumn(const Derived & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const;
template <typename Derived>
bool hasEqualValuesImpl() const;
/// Template is to devirtualize calls to 'isDefaultAt' method.
template <typename Derived>
double getRatioOfDefaultRowsImpl(double sample_ratio) const;
template <typename Derived>
UInt64 getNumberOfDefaultRowsImpl() const;
template <typename Derived>
void getIndicesOfNonDefaultRowsImpl(Offsets & indices, size_t from, size_t limit) const;
template <typename Compare, typename Sort, typename PartialSort>
void getPermutationImpl(size_t limit, Permutation & res, Compare compare,
Sort full_sort, PartialSort partial_sort) const;
void getPermutationImpl(size_t limit, Permutation & res, Compare compare, Sort full_sort, PartialSort partial_sort) const;
template <typename Compare, typename Equals, typename Sort, typename PartialSort>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_ranges, Compare compare, Equals equals,
Sort full_sort, PartialSort partial_sort) const;
void updatePermutationImpl(
size_t limit,
Permutation & res,
EqualRanges & equal_ranges,
Compare compare,
Equals equals,
Sort full_sort,
PartialSort partial_sort) const;
};
using ColumnPtr = IColumn::Ptr;
@ -667,4 +673,47 @@ bool isColumnNullable(const IColumn & column);
/// True if column's is ColumnNullable or ColumnLowCardinality with nullable nested column.
bool isColumnNullableOrLowCardinalityNullable(const IColumn & column);
/// Implement methods to devirtualize some calls of IColumn in final descendents.
/// `typename Parent` is needed because some columns don't inherit IColumn directly.
/// See ColumnFixedSizeHelper for example.
template <typename Derived, typename Parent = IColumn>
class IColumnHelper : public Parent
{
/// Devirtualize insertFrom.
MutableColumns scatter(IColumn::ColumnIndex num_columns, const IColumn::Selector & selector) const override;
/// Devirtualize insertFrom and insertRangeFrom.
void gather(ColumnGathererStream & gatherer) override;
/// Devirtualize compareAt.
void compareColumn(
const IColumn & rhs_base,
size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes,
PaddedPODArray<Int8> & compare_results,
int direction,
int nan_direction_hint) const override;
/// Devirtualize compareAt.
bool hasEqualValues() const override;
/// Devirtualize isDefaultAt.
double getRatioOfDefaultRows(double sample_ratio) const override;
/// Devirtualize isDefaultAt.
UInt64 getNumberOfDefaultRows() const override;
/// Devirtualize isDefaultAt.
void getIndicesOfNonDefaultRows(IColumn::Offsets & indices, size_t from, size_t limit) const override;
/// Devirtualize byteSizeAt.
void collectSerializedValueSizes(PaddedPODArray<UInt64> & sizes, const UInt8 * is_null) const override;
/// Move common implementations into the same translation unit to ensure they are properly inlined.
char * serializeValueIntoMemoryWithNull(size_t n, char * memory, const UInt8 * is_null) const override;
StringRef serializeValueIntoArenaWithNull(size_t n, Arena & arena, char const *& begin, const UInt8 * is_null) const override;
char * serializeValueIntoMemory(size_t n, char * memory) const override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
};
}

View File

@ -35,7 +35,7 @@ bool IColumnDummy::isDefaultAt(size_t) const
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "isDefaultAt is not implemented for {}", getName());
}
StringRef IColumnDummy::serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const
StringRef IColumnDummy::serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin) const
{
/// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
char * res = arena.allocContinue(1, begin);

View File

@ -11,7 +11,7 @@ class Arena;
/** Base class for columns-constants that contain a value that is not in the `Field`.
* Not a full-fledged column and is used in a special way.
*/
class IColumnDummy : public IColumn
class IColumnDummy : public IColumnHelper<IColumnDummy>
{
public:
IColumnDummy() : s(0) {}
@ -49,8 +49,10 @@ public:
++s;
}
StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const override;
StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t /*n*/, SipHash & /*hash*/) const override

Some files were not shown because too many files have changed in this diff Show More