Merge remote-tracking branch 'origin/master' into pr-local-plan

This commit is contained in:
Igor Nikonov 2024-06-05 06:46:56 +00:00
commit f39dbac9a0
385 changed files with 12665 additions and 7500 deletions

View File

@ -10,3 +10,11 @@ assignees: ''
> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse
> If you still prefer GitHub issues, remove all this text and ask your question here.
**Company or project name**
Put your company name or project description here
**Question**
Your question

View File

@ -9,6 +9,10 @@ assignees: ''
> (you don't have to strictly follow this form)
**Company or project name**
> Put your company name or project description here
**Use case**
> A clear and concise description of what is the intended usage scenario is.

View File

@ -9,6 +9,10 @@ assignees: ''
(you don't have to strictly follow this form)
**Company or project name**
Put your company name or project description here
**Describe the unexpected behaviour**
A clear and concise description of what works not as it is supposed to.

View File

@ -9,6 +9,10 @@ assignees: ''
(you don't have to strictly follow this form)
**Company or project name**
Put your company name or project description here
**Describe the unexpected behaviour**
A clear and concise description of what works not as it is supposed to.

View File

@ -9,6 +9,9 @@ assignees: ''
(you don't have to strictly follow this form)
**Company or project name**
Put your company name or project description here
**Describe the issue**
A clear and concise description of what works not as it is supposed to.

View File

@ -9,6 +9,10 @@ assignees: ''
> Make sure that `git diff` result is empty and you've just pulled fresh master. Try cleaning up cmake cache. Just in case, official build instructions are published here: https://clickhouse.com/docs/en/development/build/
**Company or project name**
> Put your company name or project description here
**Operating system**
> OS kind or distribution, specific version/release, non-standard kernel if any. If you are trying to build inside virtual machine, please mention it too.

View File

@ -8,6 +8,9 @@ labels: comp-documentation
(you don't have to strictly follow this form)
**Company or project name**
Put your company name or project description here
**Describe the issue**
A clear and concise description of what's wrong in documentation.

View File

@ -9,6 +9,9 @@ assignees: ''
(you don't have to strictly follow this form)
**Company or project name**
Put your company name or project description here
**Describe the situation**
What exactly works slower than expected?

View File

@ -9,6 +9,9 @@ assignees: ''
(you don't have to strictly follow this form)
**Company or project name**
Put your company name or project description here
**Describe the issue**
A clear and concise description of what works not as it is supposed to.

View File

@ -11,6 +11,10 @@ assignees: ''
> You have to provide the following information whenever possible.
**Company or project name**
> Put your company name or project description here
**Describe what's wrong**
> A clear and concise description of what works not as it is supposed to.

View File

@ -7,6 +7,10 @@ assignees: ''
---
**Company or project name**
Put your company name or project description here
**I have tried the following solutions**: https://clickhouse.com/docs/en/faq/troubleshooting/#troubleshooting-installation-errors
**Installation type**

View File

@ -58,7 +58,7 @@ jobs:
env:
GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].num_batches > 1 && format('-{0}',matrix.batch) || '' }}
strategy:
fail-fast: false # we always wait for entire matrix
fail-fast: false # we always wait for the entire matrix
matrix:
batch: ${{ fromJson(inputs.data).jobs_data.jobs_params[inputs.test_name].batches }}
steps:

3
.gitignore vendored
View File

@ -21,6 +21,9 @@
*.stderr
*.stdout
# llvm-xray logs
xray-log.*
/docs/build
/docs/publish
/docs/edit

View File

@ -1,29 +0,0 @@
### CI modificators (add a leading space to apply) ###
## To avoid a merge commit in CI:
#no_merge_commit
## To discard CI cache:
#no_ci_cache
## To not test (only style check):
#do_not_test
## To run specified set of tests in CI:
#ci_set_<SET_NAME>
#ci_set_reduced
#ci_set_arm
#ci_set_integration
#ci_set_old_analyzer
## To run specified job in CI:
#job_<JOB NAME>
#job_stateless_tests_release
#job_package_debug
#job_integration_tests_asan
## To run only specified batches for multi-batch job(s)
#batch_2
#batch_1_2_3

View File

@ -11,8 +11,8 @@
### <a id="245"></a> ClickHouse release 24.5, 2024-05-30
#### Backward Incompatible Change
* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)).
* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_functions = 1` or set `compatibility = '24.4'` or lower. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)).
* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make sure to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)).
* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_error_prone_window_functions = 1` or set `compatibility = '24.4'` or lower. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)).
* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### New Feature

View File

@ -122,6 +122,8 @@ add_library(global-libs INTERFACE)
include (cmake/sanitize.cmake)
include (cmake/instrument.cmake)
option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON)
set (CMAKE_COLOR_MAKEFILE ${ENABLE_COLORED_BUILD}) # works only for the makefile generator
@ -208,8 +210,6 @@ option(OMIT_HEAVY_DEBUG_SYMBOLS
"Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)"
${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT})
option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS})
option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF)
if (NOT BUILD_STANDALONE_KEEPER)
option(CREATE_KEEPER_SYMLINK "Create symlink for clickhouse-keeper to main server binary" ON)

View File

@ -2,20 +2,22 @@
the file is autogenerated by utils/security-generator/generate_security.py
-->
# Security Policy
# ClickHouse Security Vulnerability Response Policy
## Security Announcements
Security fixes will be announced by posting them in the [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/).
## Security Change Log and Support
## Scope and Supported Versions
Details regarding security fixes are publicly reported in our [security changelog](https://clickhouse.com/docs/en/whats-new/security-changelog/). A summary of known security vulnerabilities is shown at the bottom of this page.
The following versions of ClickHouse server are currently being supported with security updates:
Vulnerability notifications pre-release or during embargo periods are available to open source users and support customers registered for vulnerability alerts. Refer to our [Embargo Policy](#embargo-policy) below.
The following versions of ClickHouse server are currently supported with security updates:
| Version | Supported |
|:-|:-|
| 24.5 | ✔️ |
| 24.4 | ✔️ |
| 24.3 | ✔️ |
| 24.2 | ✔️ |
| 24.2 | |
| 24.1 | ❌ |
| 23.* | ❌ |
| 23.8 | ✔️ |
@ -37,7 +39,7 @@ The following versions of ClickHouse server are currently being supported with s
We're extremely grateful for security researchers and users that report vulnerabilities to the ClickHouse Open Source Community. All reports are thoroughly investigated by developers.
To report a potential vulnerability in ClickHouse please send the details about it to [security@clickhouse.com](mailto:security@clickhouse.com). We do not offer any financial rewards for reporting issues to us using this method. Alternatively, you can also submit your findings through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
To report a potential vulnerability in ClickHouse please send the details about it through our public bug bounty program hosted by [Bugcrowd](https://bugcrowd.com/clickhouse) and be rewarded for it as per the program scope and rules of engagement.
### When Should I Report a Vulnerability?
@ -59,3 +61,21 @@ As the security issue moves from triage, to identified fix, to release planning
A public disclosure date is negotiated by the ClickHouse maintainers and the bug submitter. We prefer to fully disclose the bug as soon as possible once a user mitigation is available. It is reasonable to delay disclosure when the bug or the fix is not yet fully understood, the solution is not well-tested, or for vendor coordination. The timeframe for disclosure is from immediate (especially if it's already publicly known) to 90 days. For a vulnerability with a straightforward mitigation, we expect the report date to disclosure date to be on the order of 7 days.
## Embargo Policy
Open source users and support customers may subscribe to receive alerts during the embargo period by visiting [https://trust.clickhouse.com/?product=clickhouseoss](https://trust.clickhouse.com/?product=clickhouseoss), requesting access and subscribing for alerts. Subscribers agree not to make these notifications public, issue communications, share this information with others, or issue public patches before the disclosure date. Accidental disclosures must be reported immediately to trust@clickhouse.com. Failure to follow this policy or repeated leaks may result in removal from the subscriber list.
Participation criteria:
1. Be a current open source user or support customer with a valid corporate email domain (no @gmail.com, @azure.com, etc.).
1. Sign up to the ClickHouse OSS Trust Center at [https://trust.clickhouse.com](https://trust.clickhouse.com).
1. Accept the ClickHouse Security Vulnerability Response Policy as outlined above.
1. Subscribe to ClickHouse OSS Trust Center alerts.
Removal criteria:
1. Members may be removed for failure to follow this policy or repeated leaks.
1. Members may be removed for bounced messages (mail delivery failure).
1. Members may unsubscribe at any time.
Notification process:
ClickHouse will post notifications within our OSS Trust Center and notify subscribers. Subscribers must log in to the Trust Center to download the notification. The notification will include the timeframe for public disclosure.

View File

@ -34,15 +34,6 @@ set (SRCS
throwError.cpp
)
if (USE_DEBUG_HELPERS)
get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES)
# CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc.
# Prefixing "SHELL:" will force it to use the original text.
set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${MAGIC_ENUM_INCLUDE_DIR}\" -include \"${ClickHouse_SOURCE_DIR}/base/base/iostream_debug_helpers.h\"")
# Use generator expression as we don't want to pollute CMAKE_CXX_FLAGS, which will interfere with CMake check system.
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${INCLUDE_DEBUG_HELPERS}>)
endif ()
add_library (common ${SRCS})
if (WITH_COVERAGE)

View File

@ -1,187 +0,0 @@
#pragma once
#include "demangle.h"
#include "getThreadId.h"
#include <type_traits>
#include <tuple>
#include <iomanip>
#include <iostream>
#include <magic_enum.hpp>
/** Usage:
*
* DUMP(variable...)
*/
template <typename Out, typename T>
Out & dumpValue(Out &, T &&);
/// Catch-all case.
template <int priority, typename Out, typename T>
requires(priority == -1)
Out & dumpImpl(Out & out, T &&) // NOLINT(cppcoreguidelines-missing-std-forward)
{
return out << "{...}";
}
/// An object, that could be output with operator <<.
template <int priority, typename Out, typename T>
requires(priority == 0)
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::declval<Out &>() << std::declval<T>())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
{
return out << x;
}
/// A pointer-like object.
template <int priority, typename Out, typename T>
requires(priority == 1
/// Protect from the case when operator * do effectively nothing (function pointer).
&& !std::is_same_v<std::decay_t<T>, std::decay_t<decltype(*std::declval<T>())>>)
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(*std::declval<T>())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
{
if (!x)
return out << "nullptr";
return dumpValue(out, *x);
}
/// Container.
template <int priority, typename Out, typename T>
requires(priority == 2)
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::begin(std::declval<T>()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
{
bool first = true;
out << "{";
for (const auto & elem : x)
{
if (first)
first = false;
else
out << ", ";
dumpValue(out, elem);
}
return out << "}";
}
template <int priority, typename Out, typename T>
requires(priority == 3 && std::is_enum_v<std::decay_t<T>>)
Out & dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
{
return out << magic_enum::enum_name(x);
}
/// string and const char * - output not as container or pointer.
template <int priority, typename Out, typename T>
requires(priority == 3 && (std::is_same_v<std::decay_t<T>, std::string> || std::is_same_v<std::decay_t<T>, const char *>))
Out & dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
{
return out << std::quoted(x);
}
/// UInt8 - output as number, not char.
template <int priority, typename Out, typename T>
requires(priority == 3 && std::is_same_v<std::decay_t<T>, unsigned char>)
Out & dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
{
return out << int(x);
}
/// Tuple, pair
template <size_t N, typename Out, typename T>
Out & dumpTupleImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
{
if constexpr (N == 0)
out << "{";
else
out << ", ";
dumpValue(out, std::get<N>(x));
if constexpr (N + 1 == std::tuple_size_v<std::decay_t<T>>)
out << "}";
else
dumpTupleImpl<N + 1>(out, x);
return out;
}
template <int priority, typename Out, typename T>
requires(priority == 4)
Out & dumpImpl(Out & out, T && x, std::decay_t<decltype(std::get<0>(std::declval<T>()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward)
{
return dumpTupleImpl<0>(out, x);
}
template <int priority, typename Out, typename T>
Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t<decltype(dumpImpl<priority>(std::declval<Out &>(), std::declval<T>()))> *) // NOLINT(cppcoreguidelines-missing-std-forward)
{
return dumpImpl<priority>(out, x);
}
// NOLINTNEXTLINE(google-explicit-constructor)
struct LowPriority { LowPriority(void *) {} };
template <int priority, typename Out, typename T>
Out & dumpDispatchPriorities(Out & out, T && x, LowPriority) // NOLINT(cppcoreguidelines-missing-std-forward)
{
return dumpDispatchPriorities<priority - 1>(out, x, nullptr);
}
template <typename Out, typename T>
Out & dumpValue(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
{
return dumpDispatchPriorities<5>(out, x, nullptr);
}
template <typename Out, typename T>
Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-missing-std-forward)
{
// Dumping string literal, printing name and demangled type is irrelevant.
if constexpr (std::is_same_v<const char *, std::decay_t<std::remove_reference_t<T>>>)
{
const auto name_len = strlen(name);
const auto value_len = strlen(x);
// `name` is the same as quoted `x`
if (name_len > 2 && value_len > 0 && name[0] == '"' && name[name_len - 1] == '"'
&& strncmp(name + 1, x, std::min(value_len, name_len) - 1) == 0)
return out << x;
}
out << demangle(typeid(x).name()) << " " << name << " = ";
return dumpValue(out, x) << "; ";
}
#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
#define DUMPVAR(VAR) ::dump(std::cerr, #VAR, (VAR));
#define DUMPHEAD std::cerr << __FILE__ << ':' << __LINE__ << " [ " << getThreadId() << " ] ";
#define DUMPTAIL std::cerr << '\n';
#define DUMP1(V1) do { DUMPHEAD DUMPVAR(V1) DUMPTAIL } while(0)
#define DUMP2(V1, V2) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPTAIL } while(0)
#define DUMP3(V1, V2, V3) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPTAIL } while(0)
#define DUMP4(V1, V2, V3, V4) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPTAIL } while(0)
#define DUMP5(V1, V2, V3, V4, V5) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPTAIL } while(0)
#define DUMP6(V1, V2, V3, V4, V5, V6) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPTAIL } while(0)
#define DUMP7(V1, V2, V3, V4, V5, V6, V7) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPTAIL } while(0)
#define DUMP8(V1, V2, V3, V4, V5, V6, V7, V8) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPTAIL } while(0)
#define DUMP9(V1, V2, V3, V4, V5, V6, V7, V8, V9) do { DUMPHEAD DUMPVAR(V1) DUMPVAR(V2) DUMPVAR(V3) DUMPVAR(V4) DUMPVAR(V5) DUMPVAR(V6) DUMPVAR(V7) DUMPVAR(V8) DUMPVAR(V9) DUMPTAIL } while(0)
/// https://groups.google.com/forum/#!searchin/kona-dev/variadic$20macro%7Csort:date/kona-dev/XMA-lDOqtlI/GCzdfZsD41sJ
#define VA_NUM_ARGS_IMPL(x1, x2, x3, x4, x5, x6, x7, x8, x9, N, ...) N
#define VA_NUM_ARGS(...) VA_NUM_ARGS_IMPL(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1)
#define MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS) PREFIX ## NUM_ARGS
#define MAKE_VAR_MACRO_IMPL(PREFIX, NUM_ARGS) MAKE_VAR_MACRO_IMPL_CONCAT(PREFIX, NUM_ARGS)
#define MAKE_VAR_MACRO(PREFIX, ...) MAKE_VAR_MACRO_IMPL(PREFIX, VA_NUM_ARGS(__VA_ARGS__))
#define DUMP(...) MAKE_VAR_MACRO(DUMP, __VA_ARGS__)(__VA_ARGS__)

View File

@ -1,2 +0,0 @@
clickhouse_add_executable (dump_variable dump_variable.cpp)
target_link_libraries (dump_variable PRIVATE clickhouse_common_io)

View File

@ -1,70 +0,0 @@
#include <base/iostream_debug_helpers.h>
#include <iostream>
#include <memory>
#include <vector>
#include <map>
#include <set>
#include <tuple>
#include <array>
#include <utility>
struct S1;
struct S2 {};
struct S3
{
std::set<const char *> m1;
};
std::ostream & operator<<(std::ostream & stream, const S3 & what)
{
stream << "S3 {m1=";
dumpValue(stream, what.m1) << "}";
return stream;
}
int main(int, char **)
{
int x = 1;
DUMP(x);
DUMP(x, 1, &x);
DUMP(std::make_unique<int>(1));
DUMP(std::make_shared<int>(1));
std::vector<int> vec{1, 2, 3};
DUMP(vec);
auto pair = std::make_pair(1, 2);
DUMP(pair);
auto tuple = std::make_tuple(1, 2, 3);
DUMP(tuple);
std::map<int, std::string> map{{1, "hello"}, {2, "world"}};
DUMP(map);
std::initializer_list<const char *> list{"hello", "world"};
DUMP(list);
std::array<const char *, 2> arr{{"hello", "world"}};
DUMP(arr);
//DUMP([]{});
S1 * s = nullptr;
DUMP(s);
DUMP(S2());
std::set<const char *> variants = {"hello", "world"};
DUMP(variants);
S3 s3 {{"hello", "world"}};
DUMP(s3);
return 0;
}

20
cmake/instrument.cmake Normal file
View File

@ -0,0 +1,20 @@
# https://llvm.org/docs/XRay.html
option (ENABLE_XRAY "Enable LLVM XRay" OFF)
if (NOT ENABLE_XRAY)
message (STATUS "Not using LLVM XRay")
return()
endif()
if (NOT (ARCH_AMD64 AND (OS_LINUX OR OS_FREEBSD)))
message (STATUS "Not using LLVM XRay, only amd64 Linux or FreeBSD are supported")
return()
endif()
# The target clang must support xray, otherwise it should error on invalid option
set (XRAY_FLAGS "-fxray-instrument -DUSE_XRAY")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${XRAY_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${XRAY_FLAGS}")
message (STATUS "Using LLVM XRay")

View File

@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.4.1.2088"
ARG VERSION="24.5.1.1763"
ARG PACKAGES="clickhouse-keeper"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
# lts / testing / prestable / etc
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
ARG VERSION="24.4.1.2088"
ARG VERSION="24.5.1.1763"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
ARG DIRECT_DOWNLOAD_URLS=""

View File

@ -28,7 +28,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list
ARG REPO_CHANNEL="stable"
ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
ARG VERSION="24.4.1.2088"
ARG VERSION="24.5.1.1763"
ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
#docker-official-library:off

View File

@ -15,7 +15,6 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
file \
libxml2-utils \
moreutils \
python3-fuzzywuzzy \
python3-pip \
yamllint \
locales \
@ -23,8 +22,18 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
# python-magic is the same version as in Ubuntu 22.04
RUN pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \
python-magic==0.4.24 requests types-requests \
RUN pip3 install \
PyGithub \
black==23.12.0 \
boto3 \
codespell==2.2.1 \
mypy==1.8.0 \
pylint==3.1.0 \
python-magic==0.4.24 \
requests \
thefuzz \
types-requests \
unidiff \
&& rm -rf /root/.cache/pip
RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8

View File

@ -65,46 +65,22 @@ function save_settings_clean()
script -q -c "clickhouse-local -q \"select * from system.settings into outfile '$out'\"" --log-out /dev/null
}
# We save the (numeric) version of the old server to compare setting changes between the 2
# We do this since we are testing against the latest release, not taking into account release candidates, so we might
# be testing current master (24.6) against the latest stable release (24.4)
function save_major_version()
{
local out=$1 && shift
clickhouse-local -q "SELECT a[1]::UInt64 * 100 + a[2]::UInt64 as v FROM (Select splitByChar('.', version()) as a) into outfile '$out'"
}
save_settings_clean 'old_settings.native'
save_major_version 'old_version.native'
# Initial run without S3 to create system.*_log on local file system to make it
# available for dump via clickhouse-local
configure
function remove_keeper_config()
{
sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
}
# async_replication setting doesn't exist on some older versions
remove_keeper_config "async_replication" "1"
# create_if_not_exists feature flag doesn't exist on some older versions
remove_keeper_config "create_if_not_exists" "[01]"
#todo: remove these after 24.3 released.
sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
#todo: remove these after 24.3 released.
sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
# latest_logs_cache_size_threshold setting doesn't exist on some older versions
remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
# commit_logs_cache_size_threshold setting doesn't exist on some older versions
remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml
rm /etc/clickhouse-server/config.d/handlers.yaml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
start
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
@ -116,44 +92,11 @@ export USE_S3_STORAGE_FOR_MERGE_TREE=1
export ZOOKEEPER_FAULT_INJECTION=0
configure
# force_sync=false doesn't work correctly on some older versions
sudo sed -i "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" /etc/clickhouse-server/config.d/keeper_port.xml
#todo: remove these after 24.3 released.
sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
#todo: remove these after 24.3 released.
sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
# async_replication setting doesn't exist on some older versions
remove_keeper_config "async_replication" "1"
# create_if_not_exists feature flag doesn't exist on some older versions
remove_keeper_config "create_if_not_exists" "[01]"
# latest_logs_cache_size_threshold setting doesn't exist on some older versions
remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
# commit_logs_cache_size_threshold setting doesn't exist on some older versions
remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
# But we still need default disk because some tables loaded only into it
sudo sed -i "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
rm /etc/clickhouse-server/config.d/backoff_failed_mutation.xml
rm /etc/clickhouse-server/config.d/handlers.yaml
rm /etc/clickhouse-server/config.d/block_number.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
start
clickhouse-client --query="SELECT 'Server version: ', version()"
@ -192,6 +135,7 @@ then
save_settings_clean 'new_settings.native'
clickhouse-local -nmq "
CREATE TABLE old_settings AS file('old_settings.native');
CREATE TABLE old_version AS file('old_version.native');
CREATE TABLE new_settings AS file('new_settings.native');
SELECT
@ -202,8 +146,11 @@ then
LEFT JOIN old_settings ON new_settings.name = old_settings.name
WHERE (new_settings.value != old_settings.value) AND (name NOT IN (
SELECT arrayJoin(tupleElement(changes, 'name'))
FROM system.settings_changes
WHERE version = extract(version(), '^(?:\\d+\\.\\d+)')
FROM
(
SELECT *, splitByChar('.', version) AS version_array FROM system.settings_changes
)
WHERE (version_array[1]::UInt64 * 100 + version_array[2]::UInt64) > (SELECT v FROM old_version LIMIT 1)
))
SETTINGS join_use_nulls = 1
INTO OUTFILE 'changed_settings.txt'
@ -216,8 +163,11 @@ then
FROM old_settings
)) AND (name NOT IN (
SELECT arrayJoin(tupleElement(changes, 'name'))
FROM system.settings_changes
WHERE version = extract(version(), '^(?:\\d+\\.\\d+)')
FROM
(
SELECT *, splitByChar('.', version) AS version_array FROM system.settings_changes
)
WHERE (version_array[1]::UInt64 * 100 + version_array[2]::UInt64) > (SELECT v FROM old_version LIMIT 1)
))
INTO OUTFILE 'new_settings.txt'
FORMAT PrettyCompactNoEscapes;

View File

@ -0,0 +1,366 @@
---
sidebar_position: 1
sidebar_label: 2024
---
# 2024 Changelog
### ClickHouse release v24.5.1.1763-stable (647c154a94d) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1)
#### Backward Incompatible Change
* Renamed "inverted indexes" to "full-text indexes" which is a less technical / more user-friendly name. This also changes internal table metadata and breaks tables with existing (experimental) inverted indexes. Please make to drop such indexes before upgrade and re-create them after upgrade. [#62884](https://github.com/ClickHouse/ClickHouse/pull/62884) ([Robert Schulze](https://github.com/rschu1ze)).
* Usage of functions `neighbor`, `runningAccumulate`, `runningDifferenceStartingWithFirstValue`, `runningDifference` deprecated (because it is error-prone). Proper window functions should be used instead. To enable them back, set `allow_deprecated_functions=1`. [#63132](https://github.com/ClickHouse/ClickHouse/pull/63132) ([Nikita Taranov](https://github.com/nickitat)).
* Queries from `system.columns` will work faster if there is a large number of columns, but many databases or tables are not granted for `SHOW TABLES`. Note that in previous versions, if you grant `SHOW COLUMNS` to individual columns without granting `SHOW TABLES` to the corresponding tables, the `system.columns` table will show these columns, but in a new version, it will skip the table entirely. Remove trace log messages "Access granted" and "Access denied" that slowed down queries. [#63439](https://github.com/ClickHouse/ClickHouse/pull/63439) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### New Feature
* Provide support for AzureBlobStorage function in ClickHouse server to use Azure Workload identity to authenticate against Azure blob storage. If `use_workload_identity` parameter is set in config, [workload identity](https://github.com/Azure/azure-sdk-for-cpp/tree/main/sdk/identity/azure-identity#authenticate-azure-hosted-applications) is used for authentication. [#57881](https://github.com/ClickHouse/ClickHouse/pull/57881) ([Vinay Suryadevara](https://github.com/vinay92-ch)).
* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce `StorageEmbeddedRocksDB` table settings. [#59163](https://github.com/ClickHouse/ClickHouse/pull/59163) ([Duc Canh Le](https://github.com/canhld94)).
* User can now parse CRLF with TSV format using a setting `input_format_tsv_crlf_end_of_line`. Closes [#56257](https://github.com/ClickHouse/ClickHouse/issues/56257). [#59747](https://github.com/ClickHouse/ClickHouse/pull/59747) ([Shaun Struwig](https://github.com/Blargian)).
* Adds the Form Format to read/write a single record in the application/x-www-form-urlencoded format. [#60199](https://github.com/ClickHouse/ClickHouse/pull/60199) ([Shaun Struwig](https://github.com/Blargian)).
* Added possibility to compress in CROSS JOIN. [#60459](https://github.com/ClickHouse/ClickHouse/pull/60459) ([p1rattttt](https://github.com/p1rattttt)).
* New setting `input_format_force_null_for_omitted_fields` that forces NULL values for omitted fields. [#60887](https://github.com/ClickHouse/ClickHouse/pull/60887) ([Constantine Peresypkin](https://github.com/pkit)).
* Support join with inequal conditions which involve columns from both left and right table. e.g. `t1.y < t2.y`. To enable, `SET allow_experimental_join_condition = 1`. [#60920](https://github.com/ClickHouse/ClickHouse/pull/60920) ([lgbo](https://github.com/lgbo-ustc)).
* Earlier our s3 storage and s3 table function didn't support selecting from archive files. I created a solution that allows to iterate over files inside archives in S3. [#62259](https://github.com/ClickHouse/ClickHouse/pull/62259) ([Daniil Ivanik](https://github.com/divanik)).
* Support for conditional function `clamp`. [#62377](https://github.com/ClickHouse/ClickHouse/pull/62377) ([skyoct](https://github.com/skyoct)).
* Add npy output format. [#62430](https://github.com/ClickHouse/ClickHouse/pull/62430) ([豪肥肥](https://github.com/HowePa)).
* Added SQL functions `generateUUIDv7`, `generateUUIDv7ThreadMonotonic`, `generateUUIDv7NonMonotonic` (with different monotonicity/performance trade-offs) to generate version 7 UUIDs aka. timestamp-based UUIDs with random component. Also added a new function `UUIDToNum` to extract bytes from a UUID and a new function `UUIDv7ToDateTime` to extract timestamp component from a UUID version 7. [#62852](https://github.com/ClickHouse/ClickHouse/pull/62852) ([Alexey Petrunyaka](https://github.com/pet74alex)).
* Backported in [#64307](https://github.com/ClickHouse/ClickHouse/issues/64307): Implement Dynamic data type that allows to store values of any type inside it without knowing all of them in advance. Dynamic type is available under a setting `allow_experimental_dynamic_type`. Reference: [#54864](https://github.com/ClickHouse/ClickHouse/issues/54864). [#63058](https://github.com/ClickHouse/ClickHouse/pull/63058) ([Kruglov Pavel](https://github.com/Avogar)).
* Introduce bulk loading to StorageEmbeddedRocksDB by creating and ingesting SST file instead of relying on rocksdb build-in memtable. This help to increase importing speed, especially for long-running insert query to StorageEmbeddedRocksDB tables. Also, introduce StorageEmbeddedRocksDB table settings. [#63324](https://github.com/ClickHouse/ClickHouse/pull/63324) ([Duc Canh Le](https://github.com/canhld94)).
* Raw as a synonym for TSVRaw. [#63394](https://github.com/ClickHouse/ClickHouse/pull/63394) ([Unalian](https://github.com/Unalian)).
* Added possibility to do cross join in temporary file if size exceeds limits. [#63432](https://github.com/ClickHouse/ClickHouse/pull/63432) ([p1rattttt](https://github.com/p1rattttt)).
* On Linux and MacOS, if the program has STDOUT redirected to a file with a compression extension, use the corresponding compression method instead of nothing (making it behave similarly to `INTO OUTFILE` ). [#63662](https://github.com/ClickHouse/ClickHouse/pull/63662) ([v01dXYZ](https://github.com/v01dXYZ)).
* Change warning on high number of attached tables to differentiate tables, views and dictionaries. [#64180](https://github.com/ClickHouse/ClickHouse/pull/64180) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
#### Performance Improvement
* Skip merging of newly created projection blocks during `INSERT`-s. [#59405](https://github.com/ClickHouse/ClickHouse/pull/59405) ([Nikita Taranov](https://github.com/nickitat)).
* Process string functions XXXUTF8 'asciily' if input strings are all ascii chars. Inspired by https://github.com/apache/doris/pull/29799. Overall speed up by 1.07x~1.62x. Notice that peak memory usage had been decreased in some cases. [#61632](https://github.com/ClickHouse/ClickHouse/pull/61632) ([李扬](https://github.com/taiyang-li)).
* Improved performance of selection (`{}`) globs in StorageS3. [#62120](https://github.com/ClickHouse/ClickHouse/pull/62120) ([Andrey Zvonov](https://github.com/zvonand)).
* HostResolver has each IP address several times. If remote host has several IPs and by some reason (firewall rules for example) access on some IPs allowed and on others forbidden, than only first record of forbidden IPs marked as failed, and in each try these IPs have a chance to be chosen (and failed again). Even if fix this, every 120 seconds DNS cache dropped, and IPs can be chosen again. [#62652](https://github.com/ClickHouse/ClickHouse/pull/62652) ([Anton Ivashkin](https://github.com/ianton-ru)).
* Add a new configuration`prefer_merge_sort_block_bytes` to control the memory usage and speed up sorting 2 times when merging when there are many columns. [#62904](https://github.com/ClickHouse/ClickHouse/pull/62904) ([LiuNeng](https://github.com/liuneng1994)).
* `clickhouse-local` will start faster. In previous versions, it was not deleting temporary directories by mistake. Now it will. This closes [#62941](https://github.com/ClickHouse/ClickHouse/issues/62941). [#63074](https://github.com/ClickHouse/ClickHouse/pull/63074) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Micro-optimizations for the new analyzer. [#63429](https://github.com/ClickHouse/ClickHouse/pull/63429) ([Raúl Marín](https://github.com/Algunenano)).
* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63443](https://github.com/ClickHouse/ClickHouse/pull/63443) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Index analysis will work if `DateTime` is compared to `DateTime64`. This closes [#63441](https://github.com/ClickHouse/ClickHouse/issues/63441). [#63532](https://github.com/ClickHouse/ClickHouse/pull/63532) ([Raúl Marín](https://github.com/Algunenano)).
* Speed up indices of type `set` a little (around 1.5 times) by removing garbage. [#64098](https://github.com/ClickHouse/ClickHouse/pull/64098) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Improvement
* Maps can now have `Float32`, `Float64`, `Array(T)`, `Map(K,V)` and `Tuple(T1, T2, ...)` as keys. Closes [#54537](https://github.com/ClickHouse/ClickHouse/issues/54537). [#59318](https://github.com/ClickHouse/ClickHouse/pull/59318) ([李扬](https://github.com/taiyang-li)).
* Multiline strings with border preservation and column width change. [#59940](https://github.com/ClickHouse/ClickHouse/pull/59940) ([Volodyachan](https://github.com/Volodyachan)).
* Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix a crash in asynchronous stack unwinding (such as when using the sampling query profiler) while interpreting debug info. This closes [#60460](https://github.com/ClickHouse/ClickHouse/issues/60460). [#60468](https://github.com/ClickHouse/ClickHouse/pull/60468) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Distinct messages for s3 error 'no key' for cases disk and storage. [#61108](https://github.com/ClickHouse/ClickHouse/pull/61108) ([Sema Checherinda](https://github.com/CheSema)).
* Less contention in filesystem cache (part 4). Allow to keep filesystem cache not filled to the limit by doing additional eviction in the background (controlled by `keep_free_space_size(elements)_ratio`). This allows to release pressure from space reservation for queries (on `tryReserve` method). Also this is done in a lock free way as much as possible, e.g. should not block normal cache usage. [#61250](https://github.com/ClickHouse/ClickHouse/pull/61250) ([Kseniia Sumarokova](https://github.com/kssenii)).
* The progress bar will work for trivial queries with LIMIT from `system.zeros`, `system.zeros_mt` (it already works for `system.numbers` and `system.numbers_mt`), and the `generateRandom` table function. As a bonus, if the total number of records is greater than the `max_rows_to_read` limit, it will throw an exception earlier. This closes [#58183](https://github.com/ClickHouse/ClickHouse/issues/58183). [#61823](https://github.com/ClickHouse/ClickHouse/pull/61823) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* YAML Merge Key support. [#62685](https://github.com/ClickHouse/ClickHouse/pull/62685) ([Azat Khuzhin](https://github.com/azat)).
* Enhance error message when non-deterministic function is used with Replicated source. [#62896](https://github.com/ClickHouse/ClickHouse/pull/62896) ([Grégoire Pineau](https://github.com/lyrixx)).
* Fix interserver secret for Distributed over Distributed from `remote`. [#63013](https://github.com/ClickHouse/ClickHouse/pull/63013) ([Azat Khuzhin](https://github.com/azat)).
* Allow using `clickhouse-local` and its shortcuts `clickhouse` and `ch` with a query or queries file as a positional argument. Examples: `ch "SELECT 1"`, `ch --param_test Hello "SELECT {test:String}"`, `ch query.sql`. This closes [#62361](https://github.com/ClickHouse/ClickHouse/issues/62361). [#63081](https://github.com/ClickHouse/ClickHouse/pull/63081) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Support configuration substitutions from YAML files. [#63106](https://github.com/ClickHouse/ClickHouse/pull/63106) ([Eduard Karacharov](https://github.com/korowa)).
* Add TTL information in system parts_columns table. [#63200](https://github.com/ClickHouse/ClickHouse/pull/63200) ([litlig](https://github.com/litlig)).
* Keep previous data in terminal after picking from skim suggestions. [#63261](https://github.com/ClickHouse/ClickHouse/pull/63261) ([FlameFactory](https://github.com/FlameFactory)).
* Width of fields now correctly calculate, ignoring ANSI escape sequences. [#63270](https://github.com/ClickHouse/ClickHouse/pull/63270) ([Shaun Struwig](https://github.com/Blargian)).
* Enable plain_rewritable metadata for local and Azure (azure_blob_storage) object storages. [#63365](https://github.com/ClickHouse/ClickHouse/pull/63365) ([Julia Kartseva](https://github.com/jkartseva)).
* Support English-style Unicode quotes, e.g. “Hello”, world. This is questionable in general but helpful when you type your query in a word processor, such as Google Docs. This closes [#58634](https://github.com/ClickHouse/ClickHouse/issues/58634). [#63381](https://github.com/ClickHouse/ClickHouse/pull/63381) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allowed to create MaterializedMySQL database without connection to MySQL. [#63397](https://github.com/ClickHouse/ClickHouse/pull/63397) ([Kirill](https://github.com/kirillgarbar)).
* Remove copying data when writing to filesystem cache. [#63401](https://github.com/ClickHouse/ClickHouse/pull/63401) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Update the usage of error code `NUMBER_OF_ARGUMENTS_DOESNT_MATCH` by more accurate error codes when appropriate. [#63406](https://github.com/ClickHouse/ClickHouse/pull/63406) ([Yohann Jardin](https://github.com/yohannj)).
* `os_user` and `client_hostname` are now correctly set up for queries for command line suggestions in clickhouse-client. This closes [#63430](https://github.com/ClickHouse/ClickHouse/issues/63430). [#63433](https://github.com/ClickHouse/ClickHouse/pull/63433) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fixed tabulation from line numbering, correct handling of length when moving a line if the value has a tab, added tests. [#63493](https://github.com/ClickHouse/ClickHouse/pull/63493) ([Volodyachan](https://github.com/Volodyachan)).
* Add this `aggregate_function_group_array_has_limit_size`setting to support discarding data in some scenarios. [#63516](https://github.com/ClickHouse/ClickHouse/pull/63516) ([zhongyuankai](https://github.com/zhongyuankai)).
* Automatically mark a replica of Replicated database as lost and start recovery if some DDL task fails more than `max_retries_before_automatic_recovery` (100 by default) times in a row with the same error. Also, fixed a bug that could cause skipping DDL entries when an exception is thrown during an early stage of entry execution. [#63549](https://github.com/ClickHouse/ClickHouse/pull/63549) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Automatically correct `max_block_size=0` to default value. [#63587](https://github.com/ClickHouse/ClickHouse/pull/63587) ([Antonio Andelic](https://github.com/antonio2368)).
* Account failed files in `s3queue_tracked_file_ttl_sec` and `s3queue_traked_files_limit` for `StorageS3Queue`. [#63638](https://github.com/ClickHouse/ClickHouse/pull/63638) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add a build_id ALIAS column to trace_log to facilitate auto renaming upon detecting binary changes. This is to address [#52086](https://github.com/ClickHouse/ClickHouse/issues/52086). [#63656](https://github.com/ClickHouse/ClickHouse/pull/63656) ([Zimu Li](https://github.com/woodlzm)).
* Enable truncate operation for object storage disks. [#63693](https://github.com/ClickHouse/ClickHouse/pull/63693) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* The loading of the keywords list is now dependent on the server revision and will be disabled for the old versions of ClickHouse server. CC @azat. [#63786](https://github.com/ClickHouse/ClickHouse/pull/63786) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Allow trailing commas in the columns list in the INSERT query. For example, `INSERT INTO test (a, b, c, ) VALUES ...`. [#63803](https://github.com/ClickHouse/ClickHouse/pull/63803) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Better exception messages for the `Regexp` format. [#63804](https://github.com/ClickHouse/ClickHouse/pull/63804) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow trailing commas in the `Values` format. For example, this query is allowed: `INSERT INTO test (a, b, c) VALUES (4, 5, 6,);`. [#63810](https://github.com/ClickHouse/ClickHouse/pull/63810) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Clickhouse disks have to read server setting to obtain actual metadata format version. [#63831](https://github.com/ClickHouse/ClickHouse/pull/63831) ([Sema Checherinda](https://github.com/CheSema)).
* Disable pretty format restrictions (`output_format_pretty_max_rows`/`output_format_pretty_max_value_width`) when stdout is not TTY. [#63942](https://github.com/ClickHouse/ClickHouse/pull/63942) ([Azat Khuzhin](https://github.com/azat)).
* Exception handling now works when ClickHouse is used inside AWS Lambda. Author: [Alexey Coolnev](https://github.com/acoolnev). [#64014](https://github.com/ClickHouse/ClickHouse/pull/64014) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Throw `CANNOT_DECOMPRESS` instread of `CORRUPTED_DATA` on invalid compressed data passed via HTTP. [#64036](https://github.com/ClickHouse/ClickHouse/pull/64036) ([vdimir](https://github.com/vdimir)).
* A tip for a single large number in Pretty formats now works for Nullable and LowCardinality. This closes [#61993](https://github.com/ClickHouse/ClickHouse/issues/61993). [#64084](https://github.com/ClickHouse/ClickHouse/pull/64084) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Now backups with azure blob storage will use multicopy. [#64116](https://github.com/ClickHouse/ClickHouse/pull/64116) ([alesapin](https://github.com/alesapin)).
* Add metrics, logs, and thread names around parts filtering with indices. [#64130](https://github.com/ClickHouse/ClickHouse/pull/64130) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Allow to use native copy for azure even with different containers. [#64154](https://github.com/ClickHouse/ClickHouse/pull/64154) ([alesapin](https://github.com/alesapin)).
* Finally enable native copy for azure. [#64182](https://github.com/ClickHouse/ClickHouse/pull/64182) ([alesapin](https://github.com/alesapin)).
* Ignore `allow_suspicious_primary_key` on `ATTACH` and verify on `ALTER`. [#64202](https://github.com/ClickHouse/ClickHouse/pull/64202) ([Azat Khuzhin](https://github.com/azat)).
#### Build/Testing/Packaging Improvement
* ClickHouse is built with clang-18. A lot of new checks from clang-tidy-18 have been enabled. [#60469](https://github.com/ClickHouse/ClickHouse/pull/60469) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Re-enable broken s390x build in CI. [#63135](https://github.com/ClickHouse/ClickHouse/pull/63135) ([Harry Lee](https://github.com/HarryLeeIBM)).
* The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Information about every symbol in every translation unit will be collected in the CI database for every build in the CI. This closes [#63494](https://github.com/ClickHouse/ClickHouse/issues/63494). [#63495](https://github.com/ClickHouse/ClickHouse/pull/63495) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Experimentally support loongarch64 as a new platform for ClickHouse. [#63733](https://github.com/ClickHouse/ClickHouse/pull/63733) ([qiangxuhui](https://github.com/qiangxuhui)).
* Update Apache Datasketches library. It resolves [#63858](https://github.com/ClickHouse/ClickHouse/issues/63858). [#63923](https://github.com/ClickHouse/ClickHouse/pull/63923) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Enable GRPC support for aarch64 linux while cross-compiling binary. [#64072](https://github.com/ClickHouse/ClickHouse/pull/64072) ([alesapin](https://github.com/alesapin)).
#### Bug Fix (user-visible misbehavior in an official stable release)
* Fix making backup when multiple shards are used. This PR fixes [#56566](https://github.com/ClickHouse/ClickHouse/issues/56566). [#57684](https://github.com/ClickHouse/ClickHouse/pull/57684) ([Vitaly Baranov](https://github.com/vitlibar)).
* Fix passing projections/indexes from CREATE query into inner table of MV. [#59183](https://github.com/ClickHouse/ClickHouse/pull/59183) ([Azat Khuzhin](https://github.com/azat)).
* Fix boundRatio incorrect merge. [#60532](https://github.com/ClickHouse/ClickHouse/pull/60532) ([Tao Wang](https://github.com/wangtZJU)).
* Fix crash when using some functions with low-cardinality columns. [#61966](https://github.com/ClickHouse/ClickHouse/pull/61966) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)).
* Improve the detection of cgroups v2 memory controller in unusual locations. This fixes a warning that the cgroup memory observer was disabled because no cgroups v1 or v2 current memory file could be found. [#62903](https://github.com/ClickHouse/ClickHouse/pull/62903) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix subsequent use of external tables in client. [#62964](https://github.com/ClickHouse/ClickHouse/pull/62964) ([Azat Khuzhin](https://github.com/azat)).
* Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)).
* Fix bug which could lead to server to accept connections before server is actually loaded. [#63181](https://github.com/ClickHouse/ClickHouse/pull/63181) ([alesapin](https://github.com/alesapin)).
* Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)).
* Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)).
* JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix infinite loop while listing objects in Azure blob storage. [#63257](https://github.com/ClickHouse/ClickHouse/pull/63257) ([Julia Kartseva](https://github.com/jkartseva)).
* CROSS join can be executed with any value `join_algorithm` setting, close [#62431](https://github.com/ClickHouse/ClickHouse/issues/62431). [#63273](https://github.com/ClickHouse/ClickHouse/pull/63273) ([vdimir](https://github.com/vdimir)).
* Fixed a potential crash caused by a `no space left` error when temporary data in the cache is used. [#63346](https://github.com/ClickHouse/ClickHouse/pull/63346) ([vdimir](https://github.com/vdimir)).
* Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)).
* Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)).
* Fix debug assert when using grouping WITH ROLLUP and LowCardinality types. [#63398](https://github.com/ClickHouse/ClickHouse/pull/63398) ([Raúl Marín](https://github.com/Algunenano)).
* Fix logical errors in queries with `GROUPING SETS` and `WHERE` and `group_by_use_nulls = true`, close [#60538](https://github.com/ClickHouse/ClickHouse/issues/60538). [#63405](https://github.com/ClickHouse/ClickHouse/pull/63405) ([vdimir](https://github.com/vdimir)).
* Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)).
* Insert QueryFinish on AsyncInsertFlush with no data. [#63483](https://github.com/ClickHouse/ClickHouse/pull/63483) ([Raúl Marín](https://github.com/Algunenano)).
* Fix `system.query_log.used_dictionaries` logging. [#63487](https://github.com/ClickHouse/ClickHouse/pull/63487) ([Eduard Karacharov](https://github.com/korowa)).
* Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix rabbitmq heap-use-after-free found by clang-18, which can happen if an error is thrown from RabbitMQ during initialization of exchange and queues. [#63515](https://github.com/ClickHouse/ClickHouse/pull/63515) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix crash on exit with sentry enabled (due to openssl destroyed before sentry). [#63548](https://github.com/ClickHouse/ClickHouse/pull/63548) ([Azat Khuzhin](https://github.com/azat)).
* Fix support for Array and Map with Keyed hashing functions and materialized keys. [#63628](https://github.com/ClickHouse/ClickHouse/pull/63628) ([Salvatore Mesoraca](https://github.com/aiven-sal)).
* Fixed Parquet filter pushdown not working with Analyzer. [#63642](https://github.com/ClickHouse/ClickHouse/pull/63642) ([Michael Kolupaev](https://github.com/al13n321)).
* It is forbidden to convert MergeTree to replicated if the zookeeper path for this table already exists. [#63670](https://github.com/ClickHouse/ClickHouse/pull/63670) ([Kirill](https://github.com/kirillgarbar)).
* Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix rare case with missing data in the result of distributed query. [#63691](https://github.com/ClickHouse/ClickHouse/pull/63691) ([vdimir](https://github.com/vdimir)).
* Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)).
* Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix `SIZES_OF_COLUMNS_DOESNT_MATCH` error for queries with `arrayJoin` function in `WHERE`. Fixes [#63653](https://github.com/ClickHouse/ClickHouse/issues/63653). [#63722](https://github.com/ClickHouse/ClickHouse/pull/63722) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)).
* `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)).
* Fix a query with a duplicating cycling alias. Fixes [#63320](https://github.com/ClickHouse/ClickHouse/issues/63320). [#63791](https://github.com/ClickHouse/ClickHouse/pull/63791) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fixed performance degradation of parsing data formats in INSERT query. This closes [#62918](https://github.com/ClickHouse/ClickHouse/issues/62918). This partially reverts [#42284](https://github.com/ClickHouse/ClickHouse/issues/42284), which breaks the original design and introduces more problems. [#63801](https://github.com/ClickHouse/ClickHouse/pull/63801) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Add 'endpoint_subpath' S3 URI setting to allow plain_rewritable disks to share the same endpoint. [#63806](https://github.com/ClickHouse/ClickHouse/pull/63806) ([Julia Kartseva](https://github.com/jkartseva)).
* Fix queries using parallel read buffer (e.g. with max_download_thread > 0) getting stuck when threads cannot be allocated. [#63814](https://github.com/ClickHouse/ClickHouse/pull/63814) ([Antonio Andelic](https://github.com/antonio2368)).
* Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)).
* Remove the data from all disks after DROP with the Lazy database engines. Without these changes, orhpaned will remain on the disks. [#63848](https://github.com/ClickHouse/ClickHouse/pull/63848) ([MikhailBurdukov](https://github.com/MikhailBurdukov)).
* Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)).
* Fixes in `find_super_nodes` and `find_big_family` command of keeper-client: - do not fail on ZNONODE errors - find super nodes inside super nodes - properly calculate subtree node count. [#63862](https://github.com/ClickHouse/ClickHouse/pull/63862) ([Alexander Gololobov](https://github.com/davenger)).
* Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)).
* Fixed `EXPLAIN CURRENT TRANSACTION` query. [#63926](https://github.com/ClickHouse/ClickHouse/pull/63926) ([Anton Popov](https://github.com/CurtizJ)).
* Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Allow `ALTER TABLE .. MODIFY|RESET SETTING` and `ALTER TABLE .. MODIFY COMMENT` for plain_rewritable disk. [#63933](https://github.com/ClickHouse/ClickHouse/pull/63933) ([Julia Kartseva](https://github.com/jkartseva)).
* Fix Recursive CTE with distributed queries. Closes [#63790](https://github.com/ClickHouse/ClickHouse/issues/63790). [#63939](https://github.com/ClickHouse/ClickHouse/pull/63939) ([Maksim Kita](https://github.com/kitaisreal)).
* Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)).
* Fix the `Not found column` error for queries with `skip_unused_shards = 1`, `LIMIT BY`, and the new analyzer. Fixes [#63943](https://github.com/ClickHouse/ClickHouse/issues/63943). [#63983](https://github.com/ClickHouse/ClickHouse/pull/63983) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* (Low-quality third-party Kusto Query Language). Resolve Client Abortion Issue When Using KQL Table Function in Interactive Mode. [#63992](https://github.com/ClickHouse/ClickHouse/pull/63992) ([Yong Wang](https://github.com/kashwy)).
* Backported in [#64356](https://github.com/ClickHouse/ClickHouse/issues/64356): Fix an `Cyclic aliases` error for cyclic aliases of different type (expression and function). Fixes [#63205](https://github.com/ClickHouse/ClickHouse/issues/63205). [#63993](https://github.com/ClickHouse/ClickHouse/pull/63993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)).
* Do not throw `Storage doesn't support FINAL` error for remote queries over non-MergeTree tables with `final = true` and new analyzer. Fixes [#63960](https://github.com/ClickHouse/ClickHouse/issues/63960). [#64037](https://github.com/ClickHouse/ClickHouse/pull/64037) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)).
* Fix unwind on SIGSEGV on aarch64 (due to small stack for signal). [#64058](https://github.com/ClickHouse/ClickHouse/pull/64058) ([Azat Khuzhin](https://github.com/azat)).
* Backported in [#64324](https://github.com/ClickHouse/ClickHouse/issues/64324): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)).
* Backported in [#64384](https://github.com/ClickHouse/ClickHouse/issues/64384): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Fix azure backup writing multipart blocks as 1mb (read buffer size) instead of max_upload_part_size. [#64117](https://github.com/ClickHouse/ClickHouse/pull/64117) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Backported in [#64541](https://github.com/ClickHouse/ClickHouse/issues/64541): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)).
* Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)).
* Backported in [#64332](https://github.com/ClickHouse/ClickHouse/issues/64332): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)).
* Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)).
* Backported in [#64692](https://github.com/ClickHouse/ClickHouse/issues/64692): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)).
* Backported in [#64411](https://github.com/ClickHouse/ClickHouse/issues/64411): Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. Fixes [#64172](https://github.com/ClickHouse/ClickHouse/issues/64172). [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64625](https://github.com/ClickHouse/ClickHouse/issues/64625): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Backported in [#64682](https://github.com/ClickHouse/ClickHouse/issues/64682): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)).
#### CI Fix or Improvement (changelog entry is not required)
* Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add ability to run Azure tests in PR with label. [#63196](https://github.com/ClickHouse/ClickHouse/pull/63196) ([alesapin](https://github.com/alesapin)).
* Add azure run with msan. [#63238](https://github.com/ClickHouse/ClickHouse/pull/63238) ([alesapin](https://github.com/alesapin)).
* Improve cloud backport script. [#63282](https://github.com/ClickHouse/ClickHouse/pull/63282) ([Raúl Marín](https://github.com/Algunenano)).
* Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Extra constraints for stress and fuzzer tests. [#63470](https://github.com/ClickHouse/ClickHouse/pull/63470) ([Raúl Marín](https://github.com/Algunenano)).
* Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)).
* Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)).
* Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)).
* Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)).
* Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)).
* Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)).
* Include checks like `Stateless tests (asan, distributed cache, meta storage in keeper, s3 storage) [2/3]` in `Mergeable Check` and `A Sync`. [#63945](https://github.com/ClickHouse/ClickHouse/pull/63945) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)).
* Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC)
* Backported in [#64591](https://github.com/ClickHouse/ClickHouse/issues/64591): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)).
#### NO CL ENTRY
* NO CL ENTRY: 'Revert "Do not remove server constants from GROUP BY key for secondary query."'. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Introduce bulk loading to StorageEmbeddedRocksDB"'. [#63316](https://github.com/ClickHouse/ClickHouse/pull/63316) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Add tags for the test 03000_traverse_shadow_system_data_paths.sql to make it stable'. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)).
* NO CL ENTRY: 'Revert "Revert "Do not remove server constants from GROUP BY key for secondary query.""'. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* NO CL ENTRY: 'Revert "Fix index analysis for `DateTime64`"'. [#63525](https://github.com/ClickHouse/ClickHouse/pull/63525) ([Raúl Marín](https://github.com/Algunenano)).
* NO CL ENTRY: 'Add `jwcrypto` to integration tests runner'. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* NO CL ENTRY: 'Follow-up for the `binary_symbols` table in CI'. [#63802](https://github.com/ClickHouse/ClickHouse/pull/63802) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'chore(ci-workers): remove reusable from tailscale key'. [#63999](https://github.com/ClickHouse/ClickHouse/pull/63999) ([Gabriel Martinez](https://github.com/GMartinez-Sisti)).
* NO CL ENTRY: 'Revert "Update gui.md - Add ch-ui to open-source available tools."'. [#64064](https://github.com/ClickHouse/ClickHouse/pull/64064) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Prevent stack overflow in Fuzzer and Stress test'. [#64082](https://github.com/ClickHouse/ClickHouse/pull/64082) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* NO CL ENTRY: 'Revert "Prevent conversion to Replicated if zookeeper path already exists"'. [#64214](https://github.com/ClickHouse/ClickHouse/pull/64214) ([Sergei Trifonov](https://github.com/serxa)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Remove http_max_chunk_size setting (too internal) [#60852](https://github.com/ClickHouse/ClickHouse/pull/60852) ([Azat Khuzhin](https://github.com/azat)).
* Fix race in refreshable materialized views causing SELECT to fail sometimes [#60883](https://github.com/ClickHouse/ClickHouse/pull/60883) ([Michael Kolupaev](https://github.com/al13n321)).
* Parallel replicas: table check failover [#61935](https://github.com/ClickHouse/ClickHouse/pull/61935) ([Igor Nikonov](https://github.com/devcrafter)).
* Avoid crashing on column type mismatch in a few dozen places [#62087](https://github.com/ClickHouse/ClickHouse/pull/62087) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix optimize_if_chain_to_multiif const NULL handling [#62104](https://github.com/ClickHouse/ClickHouse/pull/62104) ([Michael Kolupaev](https://github.com/al13n321)).
* Use intrusive lists for `ResourceRequest` instead of deque [#62165](https://github.com/ClickHouse/ClickHouse/pull/62165) ([Sergei Trifonov](https://github.com/serxa)).
* Analyzer: Fix validateAggregates for tables with different aliases [#62346](https://github.com/ClickHouse/ClickHouse/pull/62346) ([vdimir](https://github.com/vdimir)).
* Improve code and tests of `DROP` of multiple tables [#62359](https://github.com/ClickHouse/ClickHouse/pull/62359) ([zhongyuankai](https://github.com/zhongyuankai)).
* Fix exception message during writing to partitioned s3/hdfs/azure path with globs [#62423](https://github.com/ClickHouse/ClickHouse/pull/62423) ([Kruglov Pavel](https://github.com/Avogar)).
* Support UBSan on Clang-19 (master) [#62466](https://github.com/ClickHouse/ClickHouse/pull/62466) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Save the stacktrace of thread waiting on failing AsyncLoader job [#62719](https://github.com/ClickHouse/ClickHouse/pull/62719) ([Sergei Trifonov](https://github.com/serxa)).
* group_by_use_nulls strikes back [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Analyzer: prefer column name to alias from array join [#62995](https://github.com/ClickHouse/ClickHouse/pull/62995) ([vdimir](https://github.com/vdimir)).
* CI: try separate the workflows file for GitHub's Merge Queue [#63123](https://github.com/ClickHouse/ClickHouse/pull/63123) ([Max K.](https://github.com/maxknv)).
* Try to fix coverage tests [#63130](https://github.com/ClickHouse/ClickHouse/pull/63130) ([Raúl Marín](https://github.com/Algunenano)).
* Fix azure backup flaky test [#63158](https://github.com/ClickHouse/ClickHouse/pull/63158) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Merging [#60920](https://github.com/ClickHouse/ClickHouse/issues/60920) [#63159](https://github.com/ClickHouse/ClickHouse/pull/63159) ([vdimir](https://github.com/vdimir)).
* QueryAnalysisPass improve QUALIFY validation [#63162](https://github.com/ClickHouse/ClickHouse/pull/63162) ([Maksim Kita](https://github.com/kitaisreal)).
* Add numpy tests for different endianness [#63189](https://github.com/ClickHouse/ClickHouse/pull/63189) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fallback action-runner to autoupdate when it's unable to start [#63195](https://github.com/ClickHouse/ClickHouse/pull/63195) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Fix possible endless loop while reading from azure [#63197](https://github.com/ClickHouse/ClickHouse/pull/63197) ([Anton Popov](https://github.com/CurtizJ)).
* Add information about materialized view security bug fix into the changelog [#63204](https://github.com/ClickHouse/ClickHouse/pull/63204) ([pufit](https://github.com/pufit)).
* Disable one query from 02994_sanity_check_settings [#63208](https://github.com/ClickHouse/ClickHouse/pull/63208) ([Raúl Marín](https://github.com/Algunenano)).
* Enable custom parquet encoder by default, attempt 2 [#63210](https://github.com/ClickHouse/ClickHouse/pull/63210) ([Michael Kolupaev](https://github.com/al13n321)).
* Update version after release [#63215](https://github.com/ClickHouse/ClickHouse/pull/63215) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update version_date.tsv and changelogs after v24.4.1.2088-stable [#63217](https://github.com/ClickHouse/ClickHouse/pull/63217) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v24.3.3.102-lts [#63226](https://github.com/ClickHouse/ClickHouse/pull/63226) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Update version_date.tsv and changelogs after v24.2.3.70-stable [#63227](https://github.com/ClickHouse/ClickHouse/pull/63227) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Return back [#61551](https://github.com/ClickHouse/ClickHouse/issues/61551) (More optimal loading of marks) [#63233](https://github.com/ClickHouse/ClickHouse/pull/63233) ([Anton Popov](https://github.com/CurtizJ)).
* Hide CI options under a spoiler [#63237](https://github.com/ClickHouse/ClickHouse/pull/63237) ([Konstantin Bogdanov](https://github.com/thevar1able)).
* Add `FROM` keyword to `TRUNCATE ALL TABLES` [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Minor follow-up to a renaming PR [#63260](https://github.com/ClickHouse/ClickHouse/pull/63260) ([Robert Schulze](https://github.com/rschu1ze)).
* More checks for concurrently deleted files and dirs in system.remote_data_paths [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)).
* Fix SettingsChangesHistory.h for allow_experimental_join_condition [#63278](https://github.com/ClickHouse/ClickHouse/pull/63278) ([Raúl Marín](https://github.com/Algunenano)).
* Update version_date.tsv and changelogs after v23.8.14.6-lts [#63285](https://github.com/ClickHouse/ClickHouse/pull/63285) ([robot-clickhouse](https://github.com/robot-clickhouse)).
* Fix azure flaky test [#63286](https://github.com/ClickHouse/ClickHouse/pull/63286) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
* Fix deadlock in `CacheDictionaryUpdateQueue` in case of exception in constructor [#63287](https://github.com/ClickHouse/ClickHouse/pull/63287) ([Nikita Taranov](https://github.com/nickitat)).
* DiskApp: fix 'list --recursive /' and crash on invalid arguments [#63296](https://github.com/ClickHouse/ClickHouse/pull/63296) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix terminate because of unhandled exception in `MergeTreeDeduplicationLog::shutdown` [#63298](https://github.com/ClickHouse/ClickHouse/pull/63298) ([Nikita Taranov](https://github.com/nickitat)).
* Move s3_plain_rewritable unit test to shell [#63317](https://github.com/ClickHouse/ClickHouse/pull/63317) ([Julia Kartseva](https://github.com/jkartseva)).
* Add tests for [#63264](https://github.com/ClickHouse/ClickHouse/issues/63264) [#63321](https://github.com/ClickHouse/ClickHouse/pull/63321) ([Raúl Marín](https://github.com/Algunenano)).
* Try fix segfault in `MergeTreeReadPoolBase::createTask` [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)).
* Update README.md [#63326](https://github.com/ClickHouse/ClickHouse/pull/63326) ([Tyler Hannan](https://github.com/tylerhannan)).
* Skip unaccessible table dirs in system.remote_data_paths [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)).
* Add test for [#56287](https://github.com/ClickHouse/ClickHouse/issues/56287) [#63340](https://github.com/ClickHouse/ClickHouse/pull/63340) ([Raúl Marín](https://github.com/Algunenano)).
* Update README.md [#63350](https://github.com/ClickHouse/ClickHouse/pull/63350) ([Tyler Hannan](https://github.com/tylerhannan)).
* Add test for [#48049](https://github.com/ClickHouse/ClickHouse/issues/48049) [#63351](https://github.com/ClickHouse/ClickHouse/pull/63351) ([Raúl Marín](https://github.com/Algunenano)).
* Add option `query_id_prefix` to `clickhouse-benchmark` [#63352](https://github.com/ClickHouse/ClickHouse/pull/63352) ([Anton Popov](https://github.com/CurtizJ)).
* Rollback azurite to working version [#63354](https://github.com/ClickHouse/ClickHouse/pull/63354) ([alesapin](https://github.com/alesapin)).
* Randomize setting `enable_block_offset_column` in stress tests [#63355](https://github.com/ClickHouse/ClickHouse/pull/63355) ([Anton Popov](https://github.com/CurtizJ)).
* Fix AST parsing of invalid type names [#63357](https://github.com/ClickHouse/ClickHouse/pull/63357) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix some 00002_log_and_exception_messages_formatting flakiness [#63358](https://github.com/ClickHouse/ClickHouse/pull/63358) ([Michael Kolupaev](https://github.com/al13n321)).
* Add a test for [#55655](https://github.com/ClickHouse/ClickHouse/issues/55655) [#63380](https://github.com/ClickHouse/ClickHouse/pull/63380) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix data race in `reportBrokenPart` [#63396](https://github.com/ClickHouse/ClickHouse/pull/63396) ([Antonio Andelic](https://github.com/antonio2368)).
* Workaround for `oklch()` inside canvas bug for firefox [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)).
* Add test for issue [#47862](https://github.com/ClickHouse/ClickHouse/issues/47862) [#63424](https://github.com/ClickHouse/ClickHouse/pull/63424) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix parsing of `CREATE INDEX` query [#63425](https://github.com/ClickHouse/ClickHouse/pull/63425) ([Anton Popov](https://github.com/CurtizJ)).
* We are using Shared Catalog in the CI Logs cluster [#63442](https://github.com/ClickHouse/ClickHouse/pull/63442) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix collection of coverage data in the CI Logs cluster [#63453](https://github.com/ClickHouse/ClickHouse/pull/63453) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix flaky test for rocksdb bulk sink [#63457](https://github.com/ClickHouse/ClickHouse/pull/63457) ([Duc Canh Le](https://github.com/canhld94)).
* io_uring: refactor get reader from context [#63475](https://github.com/ClickHouse/ClickHouse/pull/63475) ([Tomer Shafir](https://github.com/tomershafir)).
* Analyzer setting max_streams_to_max_threads_ratio overflow fix [#63478](https://github.com/ClickHouse/ClickHouse/pull/63478) ([Maksim Kita](https://github.com/kitaisreal)).
* Add setting for better rendering of multiline string for pretty format [#63479](https://github.com/ClickHouse/ClickHouse/pull/63479) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Fix logical error when reloading config with customly created web disk broken after [#56367](https://github.com/ClickHouse/ClickHouse/issues/56367) [#63484](https://github.com/ClickHouse/ClickHouse/pull/63484) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Add test for [#49307](https://github.com/ClickHouse/ClickHouse/issues/49307) [#63486](https://github.com/ClickHouse/ClickHouse/pull/63486) ([Anton Popov](https://github.com/CurtizJ)).
* Remove leftovers of GCC support in cmake rules [#63488](https://github.com/ClickHouse/ClickHouse/pull/63488) ([Azat Khuzhin](https://github.com/azat)).
* Fix ProfileEventTimeIncrement code [#63489](https://github.com/ClickHouse/ClickHouse/pull/63489) ([Azat Khuzhin](https://github.com/azat)).
* MergeTreePrefetchedReadPool: Print parent name when logging projection parts [#63522](https://github.com/ClickHouse/ClickHouse/pull/63522) ([Raúl Marín](https://github.com/Algunenano)).
* Correctly stop `asyncCopy` tasks in all cases [#63523](https://github.com/ClickHouse/ClickHouse/pull/63523) ([Antonio Andelic](https://github.com/antonio2368)).
* Almost everything should work on AArch64 (Part of [#58061](https://github.com/ClickHouse/ClickHouse/issues/58061)) [#63527](https://github.com/ClickHouse/ClickHouse/pull/63527) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Update randomization of `old_parts_lifetime` [#63530](https://github.com/ClickHouse/ClickHouse/pull/63530) ([Alexander Tokmakov](https://github.com/tavplubix)).
* Update 02240_system_filesystem_cache_table.sh [#63531](https://github.com/ClickHouse/ClickHouse/pull/63531) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Fix data race in `DistributedSink` [#63538](https://github.com/ClickHouse/ClickHouse/pull/63538) ([Antonio Andelic](https://github.com/antonio2368)).
* Fix azure tests run on master [#63540](https://github.com/ClickHouse/ClickHouse/pull/63540) ([alesapin](https://github.com/alesapin)).
* Find a proper commit for cumulative `A Sync` status [#63543](https://github.com/ClickHouse/ClickHouse/pull/63543) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Add `no-s3-storage` tag to local_plain_rewritable ut [#63546](https://github.com/ClickHouse/ClickHouse/pull/63546) ([Julia Kartseva](https://github.com/jkartseva)).
* Go back to upstream lz4 submodule [#63574](https://github.com/ClickHouse/ClickHouse/pull/63574) ([Raúl Marín](https://github.com/Algunenano)).
* Fix logical error in ColumnTuple::tryInsert() [#63583](https://github.com/ClickHouse/ClickHouse/pull/63583) ([Michael Kolupaev](https://github.com/al13n321)).
* harmonize sumMap error messages on ILLEGAL_TYPE_OF_ARGUMENT [#63619](https://github.com/ClickHouse/ClickHouse/pull/63619) ([Yohann Jardin](https://github.com/yohannj)).
* Update README.md [#63631](https://github.com/ClickHouse/ClickHouse/pull/63631) ([Tyler Hannan](https://github.com/tylerhannan)).
* Ignore global profiler if system.trace_log is not enabled and fix really disable it for keeper standalone build [#63632](https://github.com/ClickHouse/ClickHouse/pull/63632) ([Azat Khuzhin](https://github.com/azat)).
* Fixes for 00002_log_and_exception_messages_formatting [#63634](https://github.com/ClickHouse/ClickHouse/pull/63634) ([Azat Khuzhin](https://github.com/azat)).
* Fix tests flakiness due to long SYSTEM FLUSH LOGS (explicitly specify old_parts_lifetime) [#63639](https://github.com/ClickHouse/ClickHouse/pull/63639) ([Azat Khuzhin](https://github.com/azat)).
* Update clickhouse-test help section [#63663](https://github.com/ClickHouse/ClickHouse/pull/63663) ([Ali](https://github.com/xogoodnow)).
* Fix bad test `02950_part_log_bytes_uncompressed` [#63672](https://github.com/ClickHouse/ClickHouse/pull/63672) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Remove leftovers of `optimize_monotonous_functions_in_order_by` [#63674](https://github.com/ClickHouse/ClickHouse/pull/63674) ([Nikita Taranov](https://github.com/nickitat)).
* tests: attempt to fix 02340_parts_refcnt_mergetree flakiness [#63684](https://github.com/ClickHouse/ClickHouse/pull/63684) ([Azat Khuzhin](https://github.com/azat)).
* Parallel replicas: simple cleanup [#63685](https://github.com/ClickHouse/ClickHouse/pull/63685) ([Igor Nikonov](https://github.com/devcrafter)).
* Cancel S3 reads properly when parallel reads are used [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)).
* Explain map insertion order [#63690](https://github.com/ClickHouse/ClickHouse/pull/63690) ([Mark Needham](https://github.com/mneedham)).
* selectRangesToRead() simple cleanup [#63692](https://github.com/ClickHouse/ClickHouse/pull/63692) ([Igor Nikonov](https://github.com/devcrafter)).
* Fix fuzzed analyzer_join_with_constant query [#63702](https://github.com/ClickHouse/ClickHouse/pull/63702) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Add missing explicit instantiations of ColumnUnique [#63718](https://github.com/ClickHouse/ClickHouse/pull/63718) ([Raúl Marín](https://github.com/Algunenano)).
* Better asserts in ColumnString.h [#63719](https://github.com/ClickHouse/ClickHouse/pull/63719) ([Raúl Marín](https://github.com/Algunenano)).
* Don't randomize some settings in 02941_variant_type_* tests to avoid timeouts [#63721](https://github.com/ClickHouse/ClickHouse/pull/63721) ([Kruglov Pavel](https://github.com/Avogar)).
* Fix flaky 03145_non_loaded_projection_backup.sh [#63728](https://github.com/ClickHouse/ClickHouse/pull/63728) ([Kseniia Sumarokova](https://github.com/kssenii)).
* Userspace page cache: don't collect stats if cache is unused [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix insignificant UBSAN error in QueryAnalyzer::replaceNodesWithPositionalArguments() [#63734](https://github.com/ClickHouse/ClickHouse/pull/63734) ([Michael Kolupaev](https://github.com/al13n321)).
* Fix a bug in resolving matcher inside lambda inside ARRAY JOIN [#63744](https://github.com/ClickHouse/ClickHouse/pull/63744) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
* Remove unused CaresPTRResolver::cancel_requests method [#63754](https://github.com/ClickHouse/ClickHouse/pull/63754) ([Arthur Passos](https://github.com/arthurpassos)).
* Do not hide disk name [#63756](https://github.com/ClickHouse/ClickHouse/pull/63756) ([Kseniia Sumarokova](https://github.com/kssenii)).
* CI: remove Cancel and Debug workflows as redundant [#63757](https://github.com/ClickHouse/ClickHouse/pull/63757) ([Max K.](https://github.com/maxknv)).
* Security Policy: Add notification process [#63773](https://github.com/ClickHouse/ClickHouse/pull/63773) ([Leticia Webb](https://github.com/leticiawebb)).
* Fix typo [#63774](https://github.com/ClickHouse/ClickHouse/pull/63774) ([Anton Popov](https://github.com/CurtizJ)).
* Fix fuzzer when only explicit faults are used [#63775](https://github.com/ClickHouse/ClickHouse/pull/63775) ([Raúl Marín](https://github.com/Algunenano)).
* Settings typo [#63782](https://github.com/ClickHouse/ClickHouse/pull/63782) ([Rory Crispin](https://github.com/RoryCrispin)).
* Changed the previous value of `output_format_pretty_preserve_border_for_multiline_string` setting [#63783](https://github.com/ClickHouse/ClickHouse/pull/63783) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* fix antlr insertStmt for issue 63657 [#63811](https://github.com/ClickHouse/ClickHouse/pull/63811) ([GG Bond](https://github.com/zzyReal666)).
* Fix race in `ReplicatedMergeTreeLogEntryData` [#63816](https://github.com/ClickHouse/ClickHouse/pull/63816) ([Antonio Andelic](https://github.com/antonio2368)).
* Allow allocation during job destructor in `ThreadPool` [#63829](https://github.com/ClickHouse/ClickHouse/pull/63829) ([Antonio Andelic](https://github.com/antonio2368)).
* io_uring: add basic io_uring clickhouse perf test [#63835](https://github.com/ClickHouse/ClickHouse/pull/63835) ([Tomer Shafir](https://github.com/tomershafir)).
* fix typo [#63838](https://github.com/ClickHouse/ClickHouse/pull/63838) ([Alexander Gololobov](https://github.com/davenger)).
* Remove unnecessary logging statements in MergeJoinTransform.cpp [#63860](https://github.com/ClickHouse/ClickHouse/pull/63860) ([vdimir](https://github.com/vdimir)).
* CI: disable ARM integration test cases with libunwind crash [#63867](https://github.com/ClickHouse/ClickHouse/pull/63867) ([Max K.](https://github.com/maxknv)).
* Fix some settings values in 02455_one_row_from_csv_memory_usage test to make it less flaky [#63874](https://github.com/ClickHouse/ClickHouse/pull/63874) ([Kruglov Pavel](https://github.com/Avogar)).
* Randomise `allow_experimental_parallel_reading_from_replicas` in stress tests [#63899](https://github.com/ClickHouse/ClickHouse/pull/63899) ([Nikita Taranov](https://github.com/nickitat)).
* Fix logs test for binary data by converting it to a valid UTF8 string. [#63909](https://github.com/ClickHouse/ClickHouse/pull/63909) ([Alexey Katsman](https://github.com/alexkats)).
* More sanity checks for parallel replicas [#63910](https://github.com/ClickHouse/ClickHouse/pull/63910) ([Nikita Taranov](https://github.com/nickitat)).
* Insignificant libunwind build fixes [#63946](https://github.com/ClickHouse/ClickHouse/pull/63946) ([Azat Khuzhin](https://github.com/azat)).
* Revert multiline pretty changes due to performance problems [#63947](https://github.com/ClickHouse/ClickHouse/pull/63947) ([Raúl Marín](https://github.com/Algunenano)).
* Some usability improvements for c++expr script [#63948](https://github.com/ClickHouse/ClickHouse/pull/63948) ([Azat Khuzhin](https://github.com/azat)).
* CI: aarch64: disable arm integration tests with kerberaized kafka [#63961](https://github.com/ClickHouse/ClickHouse/pull/63961) ([Max K.](https://github.com/maxknv)).
* Slightly better setting `force_optimize_projection_name` [#63997](https://github.com/ClickHouse/ClickHouse/pull/63997) ([Anton Popov](https://github.com/CurtizJ)).
* Better script to collect symbols statistics [#64013](https://github.com/ClickHouse/ClickHouse/pull/64013) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix a typo in Analyzer [#64022](https://github.com/ClickHouse/ClickHouse/pull/64022) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Fix libbcrypt for FreeBSD build [#64023](https://github.com/ClickHouse/ClickHouse/pull/64023) ([Azat Khuzhin](https://github.com/azat)).
* Fix searching for libclang_rt.builtins.*.a on FreeBSD [#64051](https://github.com/ClickHouse/ClickHouse/pull/64051) ([Azat Khuzhin](https://github.com/azat)).
* Fix waiting for mutations with retriable errors [#64063](https://github.com/ClickHouse/ClickHouse/pull/64063) ([Alexander Tokmakov](https://github.com/tavplubix)).
* harmonize h3PointDist* error messages [#64080](https://github.com/ClickHouse/ClickHouse/pull/64080) ([Yohann Jardin](https://github.com/yohannj)).
* This log message is better in Trace [#64081](https://github.com/ClickHouse/ClickHouse/pull/64081) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* tests: fix expected error for 03036_reading_s3_archives (fixes CI) [#64089](https://github.com/ClickHouse/ClickHouse/pull/64089) ([Azat Khuzhin](https://github.com/azat)).
* Fix sanitizers [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)).
* Update llvm/clang to 18.1.6 [#64091](https://github.com/ClickHouse/ClickHouse/pull/64091) ([Azat Khuzhin](https://github.com/azat)).
* CI: mergeable check redesign [#64093](https://github.com/ClickHouse/ClickHouse/pull/64093) ([Max K.](https://github.com/maxknv)).
* Move `isAllASCII` from UTFHelper to StringUtils [#64108](https://github.com/ClickHouse/ClickHouse/pull/64108) ([Robert Schulze](https://github.com/rschu1ze)).
* Clean up .clang-tidy after transition to Clang 18 [#64111](https://github.com/ClickHouse/ClickHouse/pull/64111) ([Robert Schulze](https://github.com/rschu1ze)).
* Ignore exception when checking for cgroupsv2 [#64118](https://github.com/ClickHouse/ClickHouse/pull/64118) ([Robert Schulze](https://github.com/rschu1ze)).
* Fix UBSan error in negative positional arguments [#64127](https://github.com/ClickHouse/ClickHouse/pull/64127) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* Syncing code [#64135](https://github.com/ClickHouse/ClickHouse/pull/64135) ([Antonio Andelic](https://github.com/antonio2368)).
* Losen build resource limits for unusual architectures [#64152](https://github.com/ClickHouse/ClickHouse/pull/64152) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
* fix clang tidy [#64179](https://github.com/ClickHouse/ClickHouse/pull/64179) ([Han Fei](https://github.com/hanfei1991)).
* Fix global query profiler [#64187](https://github.com/ClickHouse/ClickHouse/pull/64187) ([Azat Khuzhin](https://github.com/azat)).
* CI: cancel running PR wf after adding to MQ [#64188](https://github.com/ClickHouse/ClickHouse/pull/64188) ([Max K.](https://github.com/maxknv)).
* Add debug logging to EmbeddedRocksDBBulkSink [#64203](https://github.com/ClickHouse/ClickHouse/pull/64203) ([vdimir](https://github.com/vdimir)).
* Fix special builds (due to excessive resource usage - memory/CPU) [#64204](https://github.com/ClickHouse/ClickHouse/pull/64204) ([Azat Khuzhin](https://github.com/azat)).
* Add gh to style-check dockerfile [#64227](https://github.com/ClickHouse/ClickHouse/pull/64227) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)).
* Followup for [#63691](https://github.com/ClickHouse/ClickHouse/issues/63691) [#64285](https://github.com/ClickHouse/ClickHouse/pull/64285) ([vdimir](https://github.com/vdimir)).
* Rename allow_deprecated_functions to allow_deprecated_error_prone_win… [#64358](https://github.com/ClickHouse/ClickHouse/pull/64358) ([Raúl Marín](https://github.com/Algunenano)).
* Update description for settings `cross_join_min_rows_to_compress` and `cross_join_min_bytes_to_compress` [#64360](https://github.com/ClickHouse/ClickHouse/pull/64360) ([Nikita Fomichev](https://github.com/fm4v)).
* Rename aggregate_function_group_array_has_limit_size [#64362](https://github.com/ClickHouse/ClickHouse/pull/64362) ([Raúl Marín](https://github.com/Algunenano)).
* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)).
* Clean settings in 02943_variant_read_subcolumns test [#64437](https://github.com/ClickHouse/ClickHouse/pull/64437) ([Kruglov Pavel](https://github.com/Avogar)).
* CI: Critical bugfix category in PR template [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)).

View File

@ -71,7 +71,7 @@ If it fails, fix the style errors following the [code style guide](style.md).
```sh
mkdir -p /tmp/test_output
# running all checks
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE clickhouse/style-test
python3 tests/ci/style_check.py --no-push
# run specified check script (e.g.: ./check-mypy)
docker run --rm --volume=.:/ClickHouse --volume=/tmp/test_output:/test_output -u $(id -u ${USER}):$(id -g ${USER}) --cap-add=SYS_PTRACE --entrypoint= -w/ClickHouse/utils/check-style clickhouse/style-test ./check-mypy

View File

@ -37,7 +37,7 @@ ways, for example with respect to their DDL/DQL syntax or performance/compressio
To use full-text indexes, first enable them in the configuration:
```sql
SET allow_experimental_inverted_index = true;
SET allow_experimental_full_text_index = true;
```
An full-text index can be defined on a string column using the following syntax

View File

@ -178,6 +178,10 @@ Additional parameters that control the behavior of the `MergeTree` (optional):
`max_partitions_to_read` — Limits the maximum number of partitions that can be accessed in one query. You can also specify setting [max_partitions_to_read](/docs/en/operations/settings/merge-tree-settings.md/#max-partitions-to-read) in the global setting.
#### allow_experimental_optimized_row_order
`allow_experimental_optimized_row_order` - Experimental. Enables the optimization of the row order during inserts to improve the compressability of the data for compression codecs (e.g. LZ4). Analyzes and reorders the data, and thus increases the CPU overhead of inserts.
**Example of Sections Setting**
``` sql

View File

@ -7,6 +7,8 @@ sidebar_label: Configuration Files
# Configuration Files
The ClickHouse server can be configured with configuration files in XML or YAML syntax. In most installation types, the ClickHouse server runs with `/etc/clickhouse-server/config.xml` as default configuration file, but it is also possible to specify the location of the configuration file manually at server startup using command line option `--config-file=` or `-C`. Additional configuration files may be placed into directory `config.d/` relative to the main configuration file, for example into directory `/etc/clickhouse-server/config.d/`. Files in this directory and the main configuration are merged in a preprocessing step before the configuration is applied in ClickHouse server. Configuration files are merged in alphabetical order. To simplify updates and improve modularization, it is best practice to keep the default `config.xml` file unmodified and place additional customization into `config.d/`.
(The ClickHouse keeper configuration lives in `/etc/clickhouse-keeper/keeper_config.xml` and thus the additional files need to be placed in `/etc/clickhouse-keeper/keeper_config.d/` )
It is possible to mix XML and YAML configuration files, for example you could have a main configuration file `config.xml` and additional configuration files `config.d/network.xml`, `config.d/timezone.yaml` and `config.d/keeper.yaml`. Mixing XML and YAML within a single configuration file is not supported. XML configuration files should use `<clickhouse>...</clickhouse>` as top-level tag. In YAML configuration files, `clickhouse:` is optional, the parser inserts it implicitly if absent.

View File

@ -885,3 +885,47 @@ Default value: false
**See Also**
- [exclude_deleted_rows_for_part_size_in_merge](#exclude_deleted_rows_for_part_size_in_merge) setting
### allow_experimental_optimized_row_order
Controls if the row order should be optimized during inserts to improve the compressability of the newly inserted table part.
MergeTree tables are (optionally) compressed using [compression codecs](../../sql-reference/statements/create/table.md#column_compression_codec).
Generic compression codecs such as LZ4 and ZSTD achieve maximum compression rates if the data exposes patterns.
Long runs of the same value typically compress very well.
If this setting is enabled, ClickHouse attempts to store the data in newly inserted parts in a row order that minimizes the number of equal-value runs across the columns of the new table part.
In other words, a small number of equal-value runs mean that individual runs are long and compress well.
Finding the optimal row order is computationally infeasible (NP hard).
Therefore, ClickHouse uses a heuristics to quickly find a row order which still improves compression rates over the original row order.
<details markdown="1">
<summary>Heuristics for finding a row order</summary>
It is generally possible to shuffle the rows of a table (or table part) freely as SQL considers the same table (table part) in different row order equivalent.
This freedom of shuffling rows is restricted when a primary key is defined for the table.
In ClickHouse, a primary key `C1, C2, ..., CN` enforces that the table rows are sorted by columns `C1`, `C2`, ... `Cn` ([clustered index](https://en.wikipedia.org/wiki/Database_index#Clustered)).
As a result, rows can only be shuffled within "equivalence classes" of row, i.e. rows which have the same values in their primary key columns.
The intuition is that primary keys with high-cardinality, e.g. primary keys involving a `DateTime64` timestamp column, lead to many small equivalence classes.
Likewise, tables with a low-cardinality primary key, create few and large equivalence classes.
A table with no primary key represents the extreme case of a single equivalence class which spans all rows.
The fewer and the larger the equivalence classes are, the higher the degree of freedom when re-shuffling rows.
The heuristics applied to find the best row order within each equivalence class is suggested by D. Lemir, O. Kaser in [Reordering columns for smaller indexes](https://doi.org/10.1016/j.ins.2011.02.002) and based on sorting the rows within each equivalence class by ascending cardinality of the non-primary key columns.
It performs three steps:
1. Find all equivalence classes based on the row values in primary key columns.
2. For each equivalence class, calculate (usually estimate) the cardinalities of the non-primary-key columns.
3. For each equivalence class, sort the rows in order of ascending non-primary-key column cardinality.
</details>
If enabled, insert operations incur additional CPU costs to analyze and optimize the row order of the new data.
INSERTs are expected to take 30-50% longer depending on the data characteristics.
Compression rates of LZ4 or ZSTD improve on average by 20-40%.
This setting works best for tables with no primary key or a low-cardinality primary key, i.e. a table with only few distinct primary key values.
High-cardinality primary keys, e.g. involving timestamp columns of type `DateTime64`, are not expected to benefit from this setting.

View File

@ -1956,7 +1956,7 @@ Possible values:
- Positive integer.
- 0 — Asynchronous insertions are disabled.
Default value: `1000000`.
Default value: `10485760`.
### async_insert_max_query_number {#async-insert-max-query-number}

View File

@ -5,10 +5,57 @@ sidebar_position: 107
# corr
Syntax: `corr(x, y)`
Calculates the [Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient):
$$
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{\sqrt{\Sigma{(x - \bar{x})^2} * \Sigma{(y - \bar{y})^2}}}
$$
Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error.
:::
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`corrStable`](../reference/corrstable.md) function. It is slower but provides a more accurate result.
:::
**Syntax**
```sql
corr(x, y)
```
**Arguments**
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
**Returned Value**
- The Pearson correlation coefficient. [Float64](../../data-types/float.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS series;
CREATE TABLE series
(
i UInt32,
x_value Float64,
y_value Float64
)
ENGINE = Memory;
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3);
```
```sql
SELECT corr(x_value, y_value)
FROM series;
```
Result:
```response
┌─corr(x_value, y_value)─┐
│ 0.1730265755453256 │
└────────────────────────┘
```

View File

@ -0,0 +1,55 @@
---
slug: /en/sql-reference/aggregate-functions/reference/corrmatrix
sidebar_position: 108
---
# corrMatrix
Computes the correlation matrix over N variables.
**Syntax**
```sql
corrMatrix(x[, ...])
```
**Arguments**
- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
**Returned value**
- Correlation matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test;
CREATE TABLE test
(
a UInt32,
b Float64,
c Float64,
d Float64
)
ENGINE = Memory;
INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762);
```
```sql
SELECT arrayMap(x -> round(x, 3), arrayJoin(corrMatrix(a, b, c, d))) AS corrMatrix
FROM test;
```
Result:
```response
┌─corrMatrix─────────────┐
1. │ [1,-0.096,0.243,0.746] │
2. │ [-0.096,1,0.173,0.106] │
3. │ [0.243,0.173,1,0.258] │
4. │ [0.746,0.106,0.258,1] │
└────────────────────────┘
```

View File

@ -0,0 +1,58 @@
---
slug: /en/sql-reference/aggregate-functions/reference/corrstable
sidebar_position: 107
---
# corrStable
Calculates the [Pearson correlation coefficient](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient):
$$
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{\sqrt{\Sigma{(x - \bar{x})^2} * \Sigma{(y - \bar{y})^2}}}
$$
Similar to the [`corr`](../reference/corr.md) function, but uses a numerically stable algorithm. As a result, `corrStable` is slower than `corr` but produces a more accurate result.
**Syntax**
```sql
corrStable(x, y)
```
**Arguments**
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
**Returned Value**
- The Pearson correlation coefficient. [Float64](../../data-types/float.md).
***Example**
Query:
```sql
DROP TABLE IF EXISTS series;
CREATE TABLE series
(
i UInt32,
x_value Float64,
y_value Float64
)
ENGINE = Memory;
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3);
```
```sql
SELECT corrStable(x_value, y_value)
FROM series;
```
Result:
```response
┌─corrStable(x_value, y_value)─┐
│ 0.17302657554532558 │
└──────────────────────────────┘
```

View File

@ -1,14 +1,54 @@
---
slug: /en/sql-reference/aggregate-functions/reference/covarpop
sidebar_position: 36
sidebar_position: 37
---
# covarPop
Syntax: `covarPop(x, y)`
Calculates the population covariance:
Calculates the value of `Σ((x - x̅)(y - y̅)) / n`.
$$
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{n}
$$
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error.
:::
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`covarPopStable`](../reference/covarpopstable.md) function. It works slower but provides a lower computational error.
:::
**Syntax**
```sql
covarPop(x, y)
```
**Arguments**
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
**Returned Value**
- The population covariance between `x` and `y`. [Float64](../../data-types/float.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS series;
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6, -4.4),(2, -9.6, 3),(3, -1.3, -4),(4, 5.3, 9.7),(5, 4.4, 0.037),(6, -8.6, -7.8),(7, 5.1, 9.3),(8, 7.9, -3.6),(9, -8.2, 0.62),(10, -3, 7.3);
```
```sql
SELECT covarPop(x_value, y_value)
FROM series;
```
Result:
```reference
┌─covarPop(x_value, y_value)─┐
│ 6.485648 │
└────────────────────────────┘
```

View File

@ -0,0 +1,55 @@
---
slug: /en/sql-reference/aggregate-functions/reference/covarpopmatrix
sidebar_position: 36
---
# covarPopMatrix
Returns the population covariance matrix over N variables.
**Syntax**
```sql
covarPopMatrix(x[, ...])
```
**Arguments**
- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
**Returned Value**
- Population covariance matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test;
CREATE TABLE test
(
a UInt32,
b Float64,
c Float64,
d Float64
)
ENGINE = Memory;
INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762);
```
```sql
SELECT arrayMap(x -> round(x, 3), arrayJoin(covarPopMatrix(a, b, c, d))) AS covarPopMatrix
FROM test;
```
Result:
```reference
┌─covarPopMatrix────────────┐
1. │ [8.25,-1.76,4.08,6.748] │
2. │ [-1.76,41.07,6.486,2.132] │
3. │ [4.08,6.486,34.21,4.755] │
4. │ [6.748,2.132,4.755,9.93] │
└───────────────────────────┘
```

View File

@ -0,0 +1,60 @@
---
slug: /en/sql-reference/aggregate-functions/reference/covarpopstable
sidebar_position: 36
---
# covarPopStable
Calculates the value of the population covariance:
$$
\frac{\Sigma{(x - \bar{x})(y - \bar{y})}}{n}
$$
It is similar to the [covarPop](../reference/covarpop.md) function, but uses a numerically stable algorithm. As a result, `covarPopStable` is slower than `covarPop` but produces a more accurate result.
**Syntax**
```sql
covarPop(x, y)
```
**Arguments**
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
**Returned Value**
- The population covariance between `x` and `y`. [Float64](../../data-types/float.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS series;
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3);
```
```sql
SELECT covarPopStable(x_value, y_value)
FROM
(
SELECT
x_value,
y_value
FROM series
);
```
Result:
```reference
┌─covarPopStable(x_value, y_value)─┐
│ 6.485648 │
└──────────────────────────────────┘
```

View File

@ -7,8 +7,74 @@ sidebar_position: 37
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.
Returns Float64. When `n <= 1`, returns `nan`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the [`covarSampStable`](../reference/covarsamp.md) function. It works slower but provides a lower computational error.
:::
**Syntax**
```sql
covarSamp(x, y)
```
**Arguments**
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
**Returned Value**
- The sample covariance between `x` and `y`. For `n <= 1`, `nan` is returned. [Float64](../../data-types/float.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS series;
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3);
```
```sql
SELECT covarSamp(x_value, y_value)
FROM
(
SELECT
x_value,
y_value
FROM series
);
```
Result:
```reference
┌─covarSamp(x_value, y_value)─┐
│ 7.206275555555556 │
└─────────────────────────────┘
```
Query:
```sql
SELECT covarSamp(x_value, y_value)
FROM
(
SELECT
x_value,
y_value
FROM series LIMIT 1
);
```
Result:
```reference
┌─covarSamp(x_value, y_value)─┐
│ nan │
└─────────────────────────────┘
```

View File

@ -0,0 +1,57 @@
---
slug: /en/sql-reference/aggregate-functions/reference/covarsampmatrix
sidebar_position: 38
---
# covarSampMatrix
Returns the sample covariance matrix over N variables.
**Syntax**
```sql
covarSampMatrix(x[, ...])
```
**Arguments**
- `x` — a variable number of parameters. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
**Returned Value**
- Sample covariance matrix. [Array](../../data-types/array.md)([Array](../../data-types/array.md)([Float64](../../data-types/float.md))).
**Example**
Query:
```sql
DROP TABLE IF EXISTS test;
CREATE TABLE test
(
a UInt32,
b Float64,
c Float64,
d Float64
)
ENGINE = Memory;
INSERT INTO test(a, b, c, d) VALUES (1, 5.6, -4.4, 2.6), (2, -9.6, 3, 3.3), (3, -1.3, -4, 1.2), (4, 5.3, 9.7, 2.3), (5, 4.4, 0.037, 1.222), (6, -8.6, -7.8, 2.1233), (7, 5.1, 9.3, 8.1222), (8, 7.9, -3.6, 9.837), (9, -8.2, 0.62, 8.43555), (10, -3, 7.3, 6.762);
```
```sql
SELECT arrayMap(x -> round(x, 3), arrayJoin(covarSampMatrix(a, b, c, d))) AS covarSampMatrix
FROM test;
```
Result:
```reference
┌─covarSampMatrix─────────────┐
1. │ [9.167,-1.956,4.534,7.498] │
2. │ [-1.956,45.634,7.206,2.369] │
3. │ [4.534,7.206,38.011,5.283] │
4. │ [7.498,2.369,5.283,11.034] │
└─────────────────────────────┘
```

View File

@ -0,0 +1,73 @@
---
slug: /en/sql-reference/aggregate-functions/reference/covarsampstable
sidebar_position: 37
---
# covarSampStable
Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`. Similar to [covarSamp](../reference/covarsamp.md) but works slower while providing a lower computational error.
**Syntax**
```sql
covarSampStable(x, y)
```
**Arguments**
- `x` — first variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
- `y` — second variable. [(U)Int*](../../data-types/int-uint.md), [Float*](../../data-types/float.md), [Decimal](../../data-types/decimal.md).
**Returned Value**
- The sample covariance between `x` and `y`. For `n <= 1`, `inf` is returned. [Float64](../../data-types/float.md).
**Example**
Query:
```sql
DROP TABLE IF EXISTS series;
CREATE TABLE series(i UInt32, x_value Float64, y_value Float64) ENGINE = Memory;
INSERT INTO series(i, x_value, y_value) VALUES (1, 5.6,-4.4),(2, -9.6,3),(3, -1.3,-4),(4, 5.3,9.7),(5, 4.4,0.037),(6, -8.6,-7.8),(7, 5.1,9.3),(8, 7.9,-3.6),(9, -8.2,0.62),(10, -3,7.3);
```
```sql
SELECT covarSampStable(x_value, y_value)
FROM
(
SELECT
x_value,
y_value
FROM series
);
```
Result:
```reference
┌─covarSampStable(x_value, y_value)─┐
│ 7.206275555555556 │
└───────────────────────────────────┘
```
Query:
```sql
SELECT covarSampStable(x_value, y_value)
FROM
(
SELECT
x_value,
y_value
FROM series LIMIT 1
);
```
Result:
```reference
┌─covarSampStable(x_value, y_value)─┐
│ inf │
└───────────────────────────────────┘
```

View File

@ -9,110 +9,116 @@ toc_hidden: true
Standard aggregate functions:
- [count](/docs/en/sql-reference/aggregate-functions/reference/count.md)
- [min](/docs/en/sql-reference/aggregate-functions/reference/min.md)
- [max](/docs/en/sql-reference/aggregate-functions/reference/max.md)
- [sum](/docs/en/sql-reference/aggregate-functions/reference/sum.md)
- [avg](/docs/en/sql-reference/aggregate-functions/reference/avg.md)
- [any](/docs/en/sql-reference/aggregate-functions/reference/any.md)
- [stddevPop](/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md)
- [stddevPopStable](/docs/en/sql-reference/aggregate-functions/reference/stddevpopstable.md)
- [stddevSamp](/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md)
- [stddevSampStable](/docs/en/sql-reference/aggregate-functions/reference/stddevsampstable.md)
- [varPop](/docs/en/sql-reference/aggregate-functions/reference/varpop.md)
- [varSamp](/docs/en/sql-reference/aggregate-functions/reference/varsamp.md)
- [corr](./corr.md)
- [covarPop](/docs/en/sql-reference/aggregate-functions/reference/covarpop.md)
- [covarSamp](/docs/en/sql-reference/aggregate-functions/reference/covarsamp.md)
- [entropy](./entropy.md)
- [exponentialMovingAverage](./exponentialmovingaverage.md)
- [intervalLengthSum](./intervalLengthSum.md)
- [kolmogorovSmirnovTest](./kolmogorovsmirnovtest.md)
- [mannwhitneyutest](./mannwhitneyutest.md)
- [median](./median.md)
- [rankCorr](./rankCorr.md)
- [sumKahan](./sumkahan.md)
- [studentTTest](./studentttest.md)
- [welchTTest](./welchttest.md)
- [count](../reference/count.md)
- [min](../reference/min.md)
- [max](../reference/max.md)
- [sum](../reference/sum.md)
- [avg](../reference/avg.md)
- [any](../reference/any.md)
- [stddevPop](../reference/stddevpop.md)
- [stddevPopStable](../reference/stddevpopstable.md)
- [stddevSamp](../reference/stddevsamp.md)
- [stddevSampStable](../reference/stddevsampstable.md)
- [varPop](../reference/varpop.md)
- [varSamp](../reference/varsamp.md)
- [corr](../reference/corr.md)
- [corr](../reference/corrstable.md)
- [corrMatrix](../reference/corrmatrix.md)
- [covarPop](../reference/covarpop.md)
- [covarStable](../reference/covarpopstable.md)
- [covarPopMatrix](../reference/covarpopmatrix.md)
- [covarSamp](../reference/covarsamp.md)
- [covarSampStable](../reference/covarsampstable.md)
- [covarSampMatrix](../reference/covarsampmatrix.md)
- [entropy](../reference/entropy.md)
- [exponentialMovingAverage](../reference/exponentialmovingaverage.md)
- [intervalLengthSum](../reference/intervalLengthSum.md)
- [kolmogorovSmirnovTest](../reference/kolmogorovsmirnovtest.md)
- [mannwhitneyutest](../reference/mannwhitneyutest.md)
- [median](../reference/median.md)
- [rankCorr](../reference/rankCorr.md)
- [sumKahan](../reference/sumkahan.md)
- [studentTTest](../reference/studentttest.md)
- [welchTTest](../reference/welchttest.md)
ClickHouse-specific aggregate functions:
- [analysisOfVariance](/docs/en/sql-reference/aggregate-functions/reference/analysis_of_variance.md)
- [any](/docs/en/sql-reference/aggregate-functions/reference/any_respect_nulls.md)
- [anyHeavy](/docs/en/sql-reference/aggregate-functions/reference/anyheavy.md)
- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast.md)
- [anyLast](/docs/en/sql-reference/aggregate-functions/reference/anylast_respect_nulls.md)
- [boundingRatio](/docs/en/sql-reference/aggregate-functions/reference/boundrat.md)
- [first_value](/docs/en/sql-reference/aggregate-functions/reference/first_value.md)
- [last_value](/docs/en/sql-reference/aggregate-functions/reference/last_value.md)
- [argMin](/docs/en/sql-reference/aggregate-functions/reference/argmin.md)
- [argMax](/docs/en/sql-reference/aggregate-functions/reference/argmax.md)
- [avgWeighted](/docs/en/sql-reference/aggregate-functions/reference/avgweighted.md)
- [topK](/docs/en/sql-reference/aggregate-functions/reference/topk.md)
- [topKWeighted](/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md)
- [deltaSum](./deltasum.md)
- [deltaSumTimestamp](./deltasumtimestamp.md)
- [groupArray](/docs/en/sql-reference/aggregate-functions/reference/grouparray.md)
- [groupArrayLast](/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md)
- [groupUniqArray](/docs/en/sql-reference/aggregate-functions/reference/groupuniqarray.md)
- [groupArrayInsertAt](/docs/en/sql-reference/aggregate-functions/reference/grouparrayinsertat.md)
- [groupArrayMovingAvg](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingavg.md)
- [groupArrayMovingSum](/docs/en/sql-reference/aggregate-functions/reference/grouparraymovingsum.md)
- [groupArraySample](./grouparraysample.md)
- [groupArraySorted](/docs/en/sql-reference/aggregate-functions/reference/grouparraysorted.md)
- [groupArrayIntersect](./grouparrayintersect.md)
- [groupBitAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitand.md)
- [groupBitOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitor.md)
- [groupBitXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitxor.md)
- [groupBitmap](/docs/en/sql-reference/aggregate-functions/reference/groupbitmap.md)
- [groupBitmapAnd](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapand.md)
- [groupBitmapOr](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapor.md)
- [groupBitmapXor](/docs/en/sql-reference/aggregate-functions/reference/groupbitmapxor.md)
- [sumWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/sumwithoverflow.md)
- [sumMap](/docs/en/sql-reference/aggregate-functions/reference/summap.md)
- [sumMapWithOverflow](/docs/en/sql-reference/aggregate-functions/reference/summapwithoverflow.md)
- [sumMapFiltered](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfiltered)
- [sumMapFilteredWithOverflow](/docs/en/sql-reference/aggregate-functions/parametric-functions.md/#summapfilteredwithoverflow)
- [minMap](/docs/en/sql-reference/aggregate-functions/reference/minmap.md)
- [maxMap](/docs/en/sql-reference/aggregate-functions/reference/maxmap.md)
- [skewSamp](/docs/en/sql-reference/aggregate-functions/reference/skewsamp.md)
- [skewPop](/docs/en/sql-reference/aggregate-functions/reference/skewpop.md)
- [kurtSamp](/docs/en/sql-reference/aggregate-functions/reference/kurtsamp.md)
- [kurtPop](/docs/en/sql-reference/aggregate-functions/reference/kurtpop.md)
- [uniq](/docs/en/sql-reference/aggregate-functions/reference/uniq.md)
- [uniqExact](/docs/en/sql-reference/aggregate-functions/reference/uniqexact.md)
- [uniqCombined](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md)
- [uniqCombined64](/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md)
- [uniqHLL12](/docs/en/sql-reference/aggregate-functions/reference/uniqhll12.md)
- [uniqTheta](/docs/en/sql-reference/aggregate-functions/reference/uniqthetasketch.md)
- [quantile](/docs/en/sql-reference/aggregate-functions/reference/quantile.md)
- [quantiles](/docs/en/sql-reference/aggregate-functions/reference/quantiles.md)
- [quantileExact](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md)
- [quantileExactLow](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexactlow)
- [quantileExactHigh](/docs/en/sql-reference/aggregate-functions/reference/quantileexact.md#quantileexacthigh)
- [quantileExactWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantileexactweighted.md)
- [quantileTiming](/docs/en/sql-reference/aggregate-functions/reference/quantiletiming.md)
- [quantileTimingWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletimingweighted.md)
- [quantileDeterministic](/docs/en/sql-reference/aggregate-functions/reference/quantiledeterministic.md)
- [quantileTDigest](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigest.md)
- [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
- [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
- [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
- [quantileDD](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
- [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
- [singleValueOrNull](/docs/en/sql-reference/aggregate-functions/reference/singlevalueornull.md)
- [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
- [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
- [categoricalInformationValue](/docs/en/sql-reference/aggregate-functions/reference/categoricalinformationvalue.md)
- [contingency](./contingency.md)
- [cramersV](./cramersv.md)
- [cramersVBiasCorrected](./cramersvbiascorrected.md)
- [theilsU](./theilsu.md)
- [maxIntersections](./maxintersections.md)
- [maxIntersectionsPosition](./maxintersectionsposition.md)
- [meanZTest](./meanztest.md)
- [quantileGK](./quantileGK.md)
- [quantileInterpolatedWeighted](./quantileinterpolatedweighted.md)
- [sparkBar](./sparkbar.md)
- [sumCount](./sumcount.md)
- [largestTriangleThreeBuckets](./largestTriangleThreeBuckets.md)
- [analysisOfVariance](../reference/analysis_of_variance.md)
- [any](../reference/any_respect_nulls.md)
- [anyHeavy](../reference/anyheavy.md)
- [anyLast](../reference/anylast.md)
- [anyLast](../reference/anylast_respect_nulls.md)
- [boundingRatio](../reference/boundrat.md)
- [first_value](../reference/first_value.md)
- [last_value](../reference/last_value.md)
- [argMin](../reference/argmin.md)
- [argMax](../reference/argmax.md)
- [avgWeighted](../reference/avgweighted.md)
- [topK](../reference/topk.md)
- [topKWeighted](../reference/topkweighted.md)
- [deltaSum](../reference/deltasum.md)
- [deltaSumTimestamp](../reference/deltasumtimestamp.md)
- [groupArray](../reference/grouparray.md)
- [groupArrayLast](../reference/grouparraylast.md)
- [groupUniqArray](../reference/groupuniqarray.md)
- [groupArrayInsertAt](../reference/grouparrayinsertat.md)
- [groupArrayMovingAvg](../reference/grouparraymovingavg.md)
- [groupArrayMovingSum](../reference/grouparraymovingsum.md)
- [groupArraySample](../reference/grouparraysample.md)
- [groupArraySorted](../reference/grouparraysorted.md)
- [groupArrayIntersect](../reference/grouparrayintersect.md)
- [groupBitAnd](../reference/groupbitand.md)
- [groupBitOr](../reference/groupbitor.md)
- [groupBitXor](../reference/groupbitxor.md)
- [groupBitmap](../reference/groupbitmap.md)
- [groupBitmapAnd](../reference/groupbitmapand.md)
- [groupBitmapOr](../reference/groupbitmapor.md)
- [groupBitmapXor](../reference/groupbitmapxor.md)
- [sumWithOverflow](../reference/sumwithoverflow.md)
- [sumMap](../reference/summap.md)
- [sumMapWithOverflow](../reference/summapwithoverflow.md)
- [sumMapFiltered](../parametric-functions.md/#summapfiltered)
- [sumMapFilteredWithOverflow](../parametric-functions.md/#summapfilteredwithoverflow)
- [minMap](../reference/minmap.md)
- [maxMap](../reference/maxmap.md)
- [skewSamp](../reference/skewsamp.md)
- [skewPop](../reference/skewpop.md)
- [kurtSamp](../reference/kurtsamp.md)
- [kurtPop](../reference/kurtpop.md)
- [uniq](../reference/uniq.md)
- [uniqExact](../reference/uniqexact.md)
- [uniqCombined](../reference/uniqcombined.md)
- [uniqCombined64](../reference/uniqcombined64.md)
- [uniqHLL12](../reference/uniqhll12.md)
- [uniqTheta](../reference/uniqthetasketch.md)
- [quantile](../reference/quantile.md)
- [quantiles](../reference/quantiles.md)
- [quantileExact](../reference/quantileexact.md)
- [quantileExactLow](../reference/quantileexact.md#quantileexactlow)
- [quantileExactHigh](../reference/quantileexact.md#quantileexacthigh)
- [quantileExactWeighted](../reference/quantileexactweighted.md)
- [quantileTiming](../reference/quantiletiming.md)
- [quantileTimingWeighted](../reference/quantiletimingweighted.md)
- [quantileDeterministic](../reference/quantiledeterministic.md)
- [quantileTDigest](../reference/quantiletdigest.md)
- [quantileTDigestWeighted](../reference/quantiletdigestweighted.md)
- [quantileBFloat16](../reference/quantilebfloat16.md#quantilebfloat16)
- [quantileBFloat16Weighted](../reference/quantilebfloat16.md#quantilebfloat16weighted)
- [quantileDD](../reference/quantileddsketch.md#quantileddsketch)
- [simpleLinearRegression](../reference/simplelinearregression.md)
- [singleValueOrNull](../reference/singlevalueornull.md)
- [stochasticLinearRegression](../reference/stochasticlinearregression.md)
- [stochasticLogisticRegression](../reference/stochasticlogisticregression.md)
- [categoricalInformationValue](../reference/categoricalinformationvalue.md)
- [contingency](../reference/contingency.md)
- [cramersV](../reference/cramersv.md)
- [cramersVBiasCorrected](../reference/cramersvbiascorrected.md)
- [theilsU](../reference/theilsu.md)
- [maxIntersections](../reference/maxintersections.md)
- [maxIntersectionsPosition](../reference/maxintersectionsposition.md)
- [meanZTest](../reference/meanztest.md)
- [quantileGK](../reference/quantileGK.md)
- [quantileInterpolatedWeighted](../reference/quantileinterpolatedweighted.md)
- [sparkBar](../reference/sparkbar.md)
- [sumCount](../reference/sumcount.md)
- [largestTriangleThreeBuckets](../reference/largestTriangleThreeBuckets.md)

View File

@ -167,7 +167,7 @@ Performs the opposite operation of [hex](#hex). It interprets each pair of hexad
If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs&lt;Type&gt;](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) functions.
:::note
:::note
If `unhex` is invoked from within the `clickhouse-client`, binary strings display using UTF-8.
:::
@ -322,11 +322,11 @@ Alias: `UNBIN`.
For a numeric argument `unbin()` does not return the inverse of `bin()`. If you want to convert the result to a number, you can use the [reverse](../../sql-reference/functions/string-functions.md#reverse) and [reinterpretAs&lt;Type&gt;](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264) functions.
:::note
:::note
If `unbin` is invoked from within the `clickhouse-client`, binary strings are displayed using UTF-8.
:::
Supports binary digits `0` and `1`. The number of binary digits does not have to be multiples of eight. If the argument string contains anything other than binary digits, some implementation-defined result is returned (an exception isnt thrown).
Supports binary digits `0` and `1`. The number of binary digits does not have to be multiples of eight. If the argument string contains anything other than binary digits, some implementation-defined result is returned (an exception isnt thrown).
**Arguments**
@ -482,7 +482,7 @@ mortonEncode(range_mask, args)
- `range_mask`: 1-8.
- `args`: up to 8 [unsigned integers](../data-types/int-uint.md) or columns of the aforementioned type.
Note: when using columns for `args` the provided `range_mask` tuple should still be a constant.
Note: when using columns for `args` the provided `range_mask` tuple should still be a constant.
**Returned value**
@ -626,7 +626,7 @@ Result:
Accepts a range mask (tuple) as a first argument and the code as the second argument.
Each number in the mask configures the amount of range shrink:<br/>
1 - no shrink<br/>
2 - 2x shrink<br/>
2 - 2x shrink<br/>
3 - 3x shrink<br/>
...<br/>
Up to 8x shrink.<br/>
@ -701,6 +701,267 @@ Result:
1 2 3 4 5 6 7 8
```
## hilbertEncode
Calculates code for Hilbert Curve for a list of unsigned integers.
The function has two modes of operation:
- Simple
- Expanded
### Simple mode
Simple: accepts up to 2 unsigned integers as arguments and produces a UInt64 code.
**Syntax**
```sql
hilbertEncode(args)
```
**Parameters**
- `args`: up to 2 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
**Returned value**
- A UInt64 code
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
**Example**
Query:
```sql
SELECT hilbertEncode(3, 4);
```
Result:
```response
31
```
### Expanded mode
Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 2 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments.
Each number in the mask configures the number of bits by which the corresponding argument will be shifted left, effectively scaling the argument within its range.
**Syntax**
```sql
hilbertEncode(range_mask, args)
```
**Parameters**
- `range_mask`: ([tuple](../../sql-reference/data-types/tuple.md))
- `args`: up to 2 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
Note: when using columns for `args` the provided `range_mask` tuple should still be a constant.
**Returned value**
- A UInt64 code
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
**Example**
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
Query:
```sql
SELECT hilbertEncode((10,6), 1024, 16);
```
Result:
```response
4031541586602
```
Note: tuple size must be equal to the number of the other arguments.
**Example**
For a single argument without a tuple, the function returns the argument itself as the Hilbert index, since no dimensional mapping is needed.
Query:
```sql
SELECT hilbertEncode(1);
```
Result:
```response
1
```
**Example**
If a single argument is provided with a tuple specifying bit shifts, the function shifts the argument left by the specified number of bits.
Query:
```sql
SELECT hilbertEncode(tuple(2), 128);
```
Result:
```response
512
```
**Example**
The function also accepts columns as arguments:
Query:
First create the table and insert some data.
```sql
create table hilbert_numbers(
n1 UInt32,
n2 UInt32
)
Engine=MergeTree()
ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
insert into hilbert_numbers (*) values(1,2);
```
Use column names instead of constants as function arguments to `hilbertEncode`
Query:
```sql
SELECT hilbertEncode(n1, n2) FROM hilbert_numbers;
```
Result:
```response
13
```
**implementation details**
Please note that you can fit only so many bits of information into Hilbert code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each. All overflow will be clamped to zero.
## hilbertDecode
Decodes a Hilbert curve index back into a tuple of unsigned integers, representing coordinates in multi-dimensional space.
As with the `hilbertEncode` function, this function has two modes of operation:
- Simple
- Expanded
### Simple mode
Accepts up to 2 unsigned integers as arguments and produces a UInt64 code.
**Syntax**
```sql
hilbertDecode(tuple_size, code)
```
**Parameters**
- `tuple_size`: integer value no more than 2.
- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code.
**Returned value**
- [tuple](../../sql-reference/data-types/tuple.md) of the specified size.
Type: [UInt64](../../sql-reference/data-types/int-uint.md)
**Example**
Query:
```sql
SELECT hilbertDecode(2, 31);
```
Result:
```response
["3", "4"]
```
### Expanded mode
Accepts a range mask (tuple) as a first argument and up to 2 unsigned integers as other arguments.
Each number in the mask configures the number of bits by which the corresponding argument will be shifted left, effectively scaling the argument within its range.
Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
As with the encode function, this is limited to 8 numbers at most.
**Example**
Hilbert code for one argument is always the argument itself (as a tuple).
Query:
```sql
SELECT hilbertDecode(1, 1);
```
Result:
```response
["1"]
```
**Example**
A single argument with a tuple specifying bit shifts will be right-shifted accordingly.
Query:
```sql
SELECT hilbertDecode(tuple(2), 32768);
```
Result:
```response
["128"]
```
**Example**
The function accepts a column of codes as a second argument:
First create the table and insert some data.
Query:
```sql
create table hilbert_numbers(
n1 UInt32,
n2 UInt32
)
Engine=MergeTree()
ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
insert into hilbert_numbers (*) values(1,2);
```
Use column names instead of constants as function arguments to `hilbertDecode`
Query:
```sql
select untuple(hilbertDecode(2, hilbertEncode(n1, n2))) from hilbert_numbers;
```
Result:
```response
1 2
```

View File

@ -415,8 +415,8 @@ Alias: `power(x, y)`
**Arguments**
- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md)
- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md) or [Float*](../data-types/float.md)
- `x` - [(U)Int8/16/32/64](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md)
- `y` - [(U)Int8/16/32/64](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md)
**Returned value**
@ -635,8 +635,8 @@ atan2(y, x)
**Arguments**
- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
- `y` — y-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
- `x` — x-coordinate of the point through which the ray passes. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
**Returned value**
@ -670,8 +670,8 @@ hypot(x, y)
**Arguments**
- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md).
- `x` — The first cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
- `y` — The second cathetus of a right-angle triangle. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
**Returned value**
@ -838,6 +838,7 @@ degrees(x)
**Arguments**
- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
- `x` — Input in radians. [(U)Int*](../data-types/int-uint.md), [Float*](../data-types/float.md) or [Decimal*](../data-types/decimal.md).
**Returned value**

View File

@ -735,6 +735,8 @@ LIMIT 10
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KB, MB, etc.) as string.
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
**Syntax**
```sql
@ -766,6 +768,8 @@ Result:
Given a size (number of bytes), this function returns a readable, rounded size with suffix (KiB, MiB, etc.) as string.
The opposite operations of this function are [parseReadableSize](#parseReadableSize), [parseReadableSizeOrZero](#parseReadableSizeOrZero), and [parseReadableSizeOrNull](#parseReadableSizeOrNull).
**Syntax**
```sql
@ -890,6 +894,122 @@ SELECT
└────────────────────┴────────────────────────────────────────────────┘
```
## parseReadableSize
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
If the function is unable to parse the input value, it throws an exception.
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
**Syntax**
```sql
formatReadableSize(x)
```
**Arguments**
- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)).
**Returned value**
- Number of bytes, rounded up to the nearest integer ([UInt64](../../sql-reference/data-types/int-uint.md)).
**Example**
```sql
SELECT
arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB']) AS readable_sizes,
parseReadableSize(readable_sizes) AS sizes;
```
```text
┌─readable_sizes─┬───sizes─┐
│ 1 B │ 1 │
│ 1 KiB │ 1024 │
│ 3 MB │ 3000000 │
│ 5.314 KiB │ 5442 │
└────────────────┴─────────┘
```
## parseReadableSizeOrNull
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes.
If the function is unable to parse the input value, it returns `NULL`.
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
**Syntax**
```sql
parseReadableSizeOrNull(x)
```
**Arguments**
- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)).
**Returned value**
- Number of bytes, rounded up to the nearest integer, or NULL if unable to parse the input (Nullable([UInt64](../../sql-reference/data-types/int-uint.md))).
**Example**
```sql
SELECT
arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes,
parseReadableSizeOrNull(readable_sizes) AS sizes;
```
```text
┌─readable_sizes─┬───sizes─┐
│ 1 B │ 1 │
│ 1 KiB │ 1024 │
│ 3 MB │ 3000000 │
│ 5.314 KiB │ 5442 │
│ invalid │ ᴺᵁᴸᴸ │
└────────────────┴─────────┘
```
## parseReadableSizeOrZero
Given a string containing a byte size and `B`, `KiB`, `KB`, `MiB`, `MB`, etc. as a unit (i.e. [ISO/IEC 80000-13](https://en.wikipedia.org/wiki/ISO/IEC_80000) or decimal byte unit), this function returns the corresponding number of bytes. If the function is unable to parse the input value, it returns `0`.
The inverse operations of this function are [formatReadableSize](#formatReadableSize) and [formatReadableDecimalSize](#formatReadableDecimalSize).
**Syntax**
```sql
parseReadableSizeOrZero(x)
```
**Arguments**
- `x` : Readable size with ISO/IEC 80000-13 or decimal byte unit ([String](../../sql-reference/data-types/string.md)).
**Returned value**
- Number of bytes, rounded up to the nearest integer, or 0 if unable to parse the input ([UInt64](../../sql-reference/data-types/int-uint.md)).
**Example**
```sql
SELECT
arrayJoin(['1 B', '1 KiB', '3 MB', '5.314 KiB', 'invalid']) AS readable_sizes,
parseReadableSizeOrZero(readable_sizes) AS sizes;
```
```text
┌─readable_sizes─┬───sizes─┐
│ 1 B │ 1 │
│ 1 KiB │ 1024 │
│ 3 MB │ 3000000 │
│ 5.314 KiB │ 5442 │
│ invalid │ 0 │
└────────────────┴─────────┘
```
## parseTimeDelta
Parse a sequence of numbers followed by something resembling a time unit.

View File

@ -337,7 +337,7 @@ Then, when executing the query `SELECT name FROM users_a WHERE length(name) < 5;
Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
## Column Compression Codecs
## Column Compression Codecs {#column_compression_codec}
By default, ClickHouse applies `lz4` compression in the self-managed version, and `zstd` in ClickHouse Cloud.
@ -410,6 +410,10 @@ High compression levels are useful for asymmetric scenarios, like compress once,
- For compression, ZSTD_QAT tries to use an Intel® QAT offloading device ([QuickAssist Technology](https://www.intel.com/content/www/us/en/developer/topic-technology/open/quick-assist-technology/overview.html)). If no such device was found, it will fallback to ZSTD compression in software.
- Decompression is always performed in software.
:::note
ZSTD_QAT is not available in ClickHouse Cloud.
:::
#### DEFLATE_QPL
`DEFLATE_QPL` — [Deflate compression algorithm](https://github.com/intel/qpl) implemented by Intel® Query Processing Library. Some limitations apply:

View File

@ -304,8 +304,8 @@ atan2(y, x)
**Аргументы**
- `y` — координата y точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — координата х точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `y` — координата y точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
- `x` — координата х точки, в которую проведена линия. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
**Возвращаемое значение**
@ -341,8 +341,8 @@ hypot(x, y)
**Аргументы**
- `x` — первый катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `y` — второй катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64).
- `x` — первый катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
- `y` — второй катет прямоугольного треугольника. [Float64](../../sql-reference/data-types/float.md#float32-float64) или [Decimal](../../sql-reference/data-types/decimal.md).
**Возвращаемое значение**

View File

@ -154,7 +154,8 @@ function _clickhouse_quote()
# Extract every option (everything that starts with "-") from the --help dialog.
function _clickhouse_get_options()
{
"$@" --help 2>&1 | awk -F '[ ,=<>.]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 1, 1) == "-" && length($i) > 1) print $i; } }' | sort -u
# By default --help will not print all settings, this is done only under --verbose
"$@" --help --verbose 2>&1 | awk -F '[ ,=<>.]' '{ for (i=1; i <= NF; ++i) { if (substr($i, 1, 1) == "-" && length($i) > 1) print $i; } }' | sort -u
}
function _complete_for_clickhouse_generic_bin_impl()

View File

@ -11,7 +11,6 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int KEEPER_EXCEPTION;
}
bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
@ -214,6 +213,143 @@ void GetStatCommand::execute(const ASTKeeperQuery * query, KeeperClient * client
std::cout << "numChildren = " << stat.numChildren << "\n";
}
namespace
{
/// Helper class for parallelized tree traversal
template <class UserCtx>
struct TraversalTask : public std::enable_shared_from_this<TraversalTask<UserCtx>>
{
using TraversalTaskPtr = std::shared_ptr<TraversalTask<UserCtx>>;
struct Ctx
{
std::deque<TraversalTaskPtr> new_tasks; /// Tasks for newly discovered children, that hasn't been started yet
std::deque<std::function<void(Ctx &)>> in_flight_list_requests; /// In-flight getChildren requests
std::deque<std::function<void(Ctx &)>> finish_callbacks; /// Callbacks to be called
KeeperClient * client;
UserCtx & user_ctx;
Ctx(KeeperClient * client_, UserCtx & user_ctx_) : client(client_), user_ctx(user_ctx_) {}
};
private:
const fs::path path;
const TraversalTaskPtr parent;
Int64 child_tasks = 0;
Int64 nodes_in_subtree = 1;
public:
TraversalTask(const fs::path & path_, TraversalTaskPtr parent_)
: path(path_)
, parent(parent_)
{
}
/// Start traversing the subtree
void onStart(Ctx & ctx)
{
/// tryGetChildren doesn't throw if the node is not found (was deleted in the meantime)
std::shared_ptr<std::future<Coordination::ListResponse>> list_request =
std::make_shared<std::future<Coordination::ListResponse>>(ctx.client->zookeeper->asyncTryGetChildren(path));
ctx.in_flight_list_requests.push_back([task = this->shared_from_this(), list_request](Ctx & ctx_) mutable
{
task->onGetChildren(ctx_, list_request->get());
});
}
/// Called when getChildren request returns
void onGetChildren(Ctx & ctx, const Coordination::ListResponse & response)
{
const bool traverse_children = ctx.user_ctx.onListChildren(path, response.names);
if (traverse_children)
{
/// Schedule traversal of each child
for (const auto & child : response.names)
{
auto task = std::make_shared<TraversalTask>(path / child, this->shared_from_this());
ctx.new_tasks.push_back(task);
}
child_tasks = response.names.size();
}
if (child_tasks == 0)
finish(ctx);
}
/// Called when a child subtree has been traversed
void onChildTraversalFinished(Ctx & ctx, Int64 child_nodes_in_subtree)
{
nodes_in_subtree += child_nodes_in_subtree;
--child_tasks;
/// Finish if all children have been traversed
if (child_tasks == 0)
finish(ctx);
}
private:
/// This node and all its children have been traversed
void finish(Ctx & ctx)
{
ctx.user_ctx.onFinishChildrenTraversal(path, nodes_in_subtree);
if (!parent)
return;
/// Notify the parent that we have finished traversing the subtree
ctx.finish_callbacks.push_back([p = this->parent, child_nodes_in_subtree = this->nodes_in_subtree](Ctx & ctx_)
{
p->onChildTraversalFinished(ctx_, child_nodes_in_subtree);
});
}
};
/// Traverses the tree in parallel and calls user callbacks
/// Parallelization is achieved by sending multiple async getChildren requests to Keeper, but all processing is done in a single thread
template <class UserCtx>
void parallelized_traverse(const fs::path & path, KeeperClient * client, size_t max_in_flight_requests, UserCtx & ctx_)
{
typename TraversalTask<UserCtx>::Ctx ctx(client, ctx_);
auto root_task = std::make_shared<TraversalTask<UserCtx>>(path, nullptr);
ctx.new_tasks.push_back(root_task);
/// Until there is something to do
while (!ctx.new_tasks.empty() || !ctx.in_flight_list_requests.empty() || !ctx.finish_callbacks.empty())
{
/// First process all finish callbacks, they don't wait for anything and allow to free memory
while (!ctx.finish_callbacks.empty())
{
auto callback = std::move(ctx.finish_callbacks.front());
ctx.finish_callbacks.pop_front();
callback(ctx);
}
/// Make new requests if there are less than max in flight
while (!ctx.new_tasks.empty() && ctx.in_flight_list_requests.size() < max_in_flight_requests)
{
auto task = std::move(ctx.new_tasks.front());
ctx.new_tasks.pop_front();
task->onStart(ctx);
}
/// Wait for first request in the queue to finish
if (!ctx.in_flight_list_requests.empty())
{
auto request = std::move(ctx.in_flight_list_requests.front());
ctx.in_flight_list_requests.pop_front();
request(ctx);
}
}
}
} /// anonymous namespace
bool FindSuperNodes::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
ASTPtr threshold;
@ -237,27 +373,21 @@ void FindSuperNodes::execute(const ASTKeeperQuery * query, KeeperClient * client
auto threshold = query->args[0].safeGet<UInt64>();
auto path = client->getAbsolutePath(query->args[1].safeGet<String>());
Coordination::Stat stat;
if (!client->zookeeper->exists(path, &stat))
return; /// It is ok if node was deleted meanwhile
if (stat.numChildren >= static_cast<Int32>(threshold))
std::cout << static_cast<String>(path) << "\t" << stat.numChildren << "\n";
Strings children;
auto status = client->zookeeper->tryGetChildren(path, children);
if (status == Coordination::Error::ZNONODE)
return; /// It is ok if node was deleted meanwhile
else if (status != Coordination::Error::ZOK)
throw DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, "Error {} while getting children of {}", status, path.string());
std::sort(children.begin(), children.end());
auto next_query = *query;
for (const auto & child : children)
struct
{
next_query.args[1] = DB::Field(path / child);
execute(&next_query, client);
}
bool onListChildren(const fs::path & path, const Strings & children) const
{
if (children.size() >= threshold)
std::cout << static_cast<String>(path) << "\t" << children.size() << "\n";
return true;
}
void onFinishChildrenTraversal(const fs::path &, Int64) const {}
size_t threshold;
} ctx {.threshold = threshold };
parallelized_traverse(path, client, /* max_in_flight_requests */ 50, ctx);
}
bool DeleteStaleBackups::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
@ -322,38 +452,28 @@ bool FindBigFamily::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> &
return true;
}
/// DFS the subtree and return the number of nodes in the subtree
static Int64 traverse(const fs::path & path, KeeperClient * client, std::vector<std::tuple<Int64, String>> & result)
{
Int64 nodes_in_subtree = 1;
Strings children;
auto status = client->zookeeper->tryGetChildren(path, children);
if (status == Coordination::Error::ZNONODE)
return 0;
else if (status != Coordination::Error::ZOK)
throw DB::Exception(DB::ErrorCodes::KEEPER_EXCEPTION, "Error {} while getting children of {}", status, path.string());
for (auto & child : children)
nodes_in_subtree += traverse(path / child, client, result);
result.emplace_back(nodes_in_subtree, path.string());
return nodes_in_subtree;
}
void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client) const
{
auto path = client->getAbsolutePath(query->args[0].safeGet<String>());
auto n = query->args[1].safeGet<UInt64>();
std::vector<std::tuple<Int64, String>> result;
struct
{
std::vector<std::tuple<Int64, String>> result;
traverse(path, client, result);
bool onListChildren(const fs::path &, const Strings &) const { return true; }
std::sort(result.begin(), result.end(), std::greater());
for (UInt64 i = 0; i < std::min(result.size(), static_cast<size_t>(n)); ++i)
std::cout << std::get<1>(result[i]) << "\t" << std::get<0>(result[i]) << "\n";
void onFinishChildrenTraversal(const fs::path & path, Int64 nodes_in_subtree)
{
result.emplace_back(nodes_in_subtree, path.string());
}
} ctx;
parallelized_traverse(path, client, /* max_in_flight_requests */ 50, ctx);
std::sort(ctx.result.begin(), ctx.result.end(), std::greater());
for (UInt64 i = 0; i < std::min(ctx.result.size(), static_cast<size_t>(n)); ++i)
std::cout << std::get<1>(ctx.result[i]) << "\t" << std::get<0>(ctx.result[i]) << "\n";
}
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const

View File

@ -9,8 +9,6 @@ set (CLICKHOUSE_KEEPER_LINK
clickhouse_common_zookeeper
daemon
dbms
${LINK_RESOURCE_LIB}
)
clickhouse_program_add(keeper)
@ -210,8 +208,6 @@ if (BUILD_STANDALONE_KEEPER)
loggers_no_text_log
clickhouse_common_io
clickhouse_parsers # Otherwise compression will not built. FIXME.
${LINK_RESOURCE_LIB_STANDALONE_KEEPER}
)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)

View File

@ -14,8 +14,6 @@ set (CLICKHOUSE_SERVER_LINK
clickhouse_storages_system
clickhouse_table_functions
${LINK_RESOURCE_LIB}
PUBLIC
daemon
)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,157 @@
#pragma once
#include <Analyzer/HashUtils.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Resolve/IdentifierLookup.h>
#include <Core/Joins.h>
#include <Core/NamesAndTypes.h>
#include <Interpreters/Context_fwd.h>
#include <Parsers/NullsAction.h>
namespace DB
{
struct GetColumnsOptions;
struct IdentifierResolveScope;
struct AnalysisTableExpressionData;
class QueryExpressionsAliasVisitor ;
class QueryNode;
class JoinNode;
class ColumnNode;
using ProjectionName = String;
using ProjectionNames = std::vector<ProjectionName>;
struct Settings;
class IdentifierResolver
{
public:
IdentifierResolver(
std::unordered_set<std::string_view> & ctes_in_resolve_process_,
std::unordered_map<QueryTreeNodePtr, ProjectionName> & node_to_projection_name_)
: ctes_in_resolve_process(ctes_in_resolve_process_)
, node_to_projection_name(node_to_projection_name_)
{}
/// Utility functions
static bool isExpressionNodeType(QueryTreeNodeType node_type);
static bool isFunctionExpressionNodeType(QueryTreeNodeType node_type);
static bool isSubqueryNodeType(QueryTreeNodeType node_type);
static bool isTableExpressionNodeType(QueryTreeNodeType node_type);
static DataTypePtr getExpressionNodeResultTypeOrNull(const QueryTreeNodePtr & query_tree_node);
static void collectCompoundExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const DataTypePtr & compound_expression_type,
const Identifier & valid_identifier_prefix,
std::unordered_set<Identifier> & valid_identifiers_result);
static void collectTableExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const QueryTreeNodePtr & table_expression,
const AnalysisTableExpressionData & table_expression_data,
std::unordered_set<Identifier> & valid_identifiers_result);
static void collectScopeValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const IdentifierResolveScope & scope,
bool allow_expression_identifiers,
bool allow_function_identifiers,
bool allow_table_expression_identifiers,
std::unordered_set<Identifier> & valid_identifiers_result);
static void collectScopeWithParentScopesValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const IdentifierResolveScope & scope,
bool allow_expression_identifiers,
bool allow_function_identifiers,
bool allow_table_expression_identifiers,
std::unordered_set<Identifier> & valid_identifiers_result);
static std::vector<String> collectIdentifierTypoHints(const Identifier & unresolved_identifier, const std::unordered_set<Identifier> & valid_identifiers);
static QueryTreeNodePtr wrapExpressionNodeInTupleElement(QueryTreeNodePtr expression_node, IdentifierView nested_path, const ContextPtr & context);
static QueryTreeNodePtr convertJoinedColumnTypeToNullIfNeeded(
const QueryTreeNodePtr & resolved_identifier,
const JoinKind & join_kind,
std::optional<JoinTableSide> resolved_side,
IdentifierResolveScope & scope);
/// Resolve identifier functions
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
QueryTreeNodePtr tryResolveIdentifierFromCompoundExpression(const Identifier & expression_identifier,
size_t identifier_bind_size,
const QueryTreeNodePtr & compound_expression,
String compound_expression_source,
IdentifierResolveScope & scope,
bool can_be_not_found = false);
QueryTreeNodePtr tryResolveIdentifierFromExpressionArguments(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
static bool tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
static bool tryBindIdentifierToTableExpression(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
const IdentifierResolveScope & scope);
static bool tryBindIdentifierToTableExpressions(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
const IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromTableExpression(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr matchArrayJoinSubcolumns(
const QueryTreeNodePtr & array_join_column_inner_expression,
const ColumnNode & array_join_column_expression_typed,
const QueryTreeNodePtr & resolved_expression,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromArrayJoin(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromJoinTreeNode(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & join_tree_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromJoinTree(const IdentifierLookup & identifier_lookup,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromStorage(
const Identifier & identifier,
const QueryTreeNodePtr & table_expression_node,
const AnalysisTableExpressionData & table_expression_data,
IdentifierResolveScope & scope,
size_t identifier_column_qualifier_parts,
bool can_be_not_found = false);
/// CTEs that are currently in resolve process
std::unordered_set<std::string_view> & ctes_in_resolve_process;
/// Global expression node to projection name map
std::unordered_map<QueryTreeNodePtr, ProjectionName> & node_to_projection_name;
};
}

File diff suppressed because it is too large Load Diff

View File

@ -4,6 +4,7 @@
#include <Analyzer/HashUtils.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/Resolve/IdentifierLookup.h>
#include <Analyzer/Resolve/IdentifierResolver.h>
#include <Core/Joins.h>
#include <Core/NamesAndTypes.h>
@ -121,16 +122,6 @@ public:
private:
/// Utility functions
static bool isExpressionNodeType(QueryTreeNodeType node_type);
static bool isFunctionExpressionNodeType(QueryTreeNodeType node_type);
static bool isSubqueryNodeType(QueryTreeNodeType node_type);
static bool isTableExpressionNodeType(QueryTreeNodeType node_type);
static DataTypePtr getExpressionNodeResultTypeOrNull(const QueryTreeNodePtr & query_tree_node);
static ProjectionName calculateFunctionProjectionName(const QueryTreeNodePtr & function_node,
const ProjectionNames & parameters_projection_names,
const ProjectionNames & arguments_projection_names);
@ -149,34 +140,6 @@ private:
const ProjectionName & fill_to_expression_projection_name,
const ProjectionName & fill_step_expression_projection_name);
static void collectCompoundExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const DataTypePtr & compound_expression_type,
const Identifier & valid_identifier_prefix,
std::unordered_set<Identifier> & valid_identifiers_result);
static void collectTableExpressionValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const QueryTreeNodePtr & table_expression,
const AnalysisTableExpressionData & table_expression_data,
std::unordered_set<Identifier> & valid_identifiers_result);
static void collectScopeValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const IdentifierResolveScope & scope,
bool allow_expression_identifiers,
bool allow_function_identifiers,
bool allow_table_expression_identifiers,
std::unordered_set<Identifier> & valid_identifiers_result);
static void collectScopeWithParentScopesValidIdentifiersForTypoCorrection(const Identifier & unresolved_identifier,
const IdentifierResolveScope & scope,
bool allow_expression_identifiers,
bool allow_function_identifiers,
bool allow_table_expression_identifiers,
std::unordered_set<Identifier> & valid_identifiers_result);
static std::vector<String> collectIdentifierTypoHints(const Identifier & unresolved_identifier, const std::unordered_set<Identifier> & valid_identifiers);
static QueryTreeNodePtr wrapExpressionNodeInTupleElement(QueryTreeNodePtr expression_node, IdentifierView nested_path);
QueryTreeNodePtr tryGetLambdaFromSQLUserDefinedFunctions(const std::string & function_name, ContextPtr context);
void evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & query_tree_node, IdentifierResolveScope & scope);
@ -204,84 +167,18 @@ private:
static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node);
static QueryTreeNodePtr convertJoinedColumnTypeToNullIfNeeded(
const QueryTreeNodePtr & resolved_identifier,
const JoinKind & join_kind,
std::optional<JoinTableSide> resolved_side,
IdentifierResolveScope & scope);
/// Resolve identifier functions
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
QueryTreeNodePtr tryResolveIdentifierFromCompoundExpression(const Identifier & expression_identifier,
size_t identifier_bind_size,
const QueryTreeNodePtr & compound_expression,
String compound_expression_source,
IdentifierResolveScope & scope,
bool can_be_not_found = false);
QueryTreeNodePtr tryResolveIdentifierFromExpressionArguments(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
static bool tryBindIdentifierToAliases(const IdentifierLookup & identifier_lookup, const IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromAliases(const IdentifierLookup & identifier_lookup,
IdentifierResolveScope & scope,
IdentifierResolveSettings identifier_resolve_settings);
QueryTreeNodePtr tryResolveIdentifierFromTableColumns(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
static bool tryBindIdentifierToTableExpression(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
const IdentifierResolveScope & scope);
static bool tryBindIdentifierToTableExpressions(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
const IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromTableExpression(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr matchArrayJoinSubcolumns(
const QueryTreeNodePtr & array_join_column_inner_expression,
const ColumnNode & array_join_column_expression_typed,
const QueryTreeNodePtr & resolved_expression,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveExpressionFromArrayJoinExpressions(const QueryTreeNodePtr & resolved_expression,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromArrayJoin(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & table_expression_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromJoinTreeNode(const IdentifierLookup & identifier_lookup,
const QueryTreeNodePtr & join_tree_node,
IdentifierResolveScope & scope);
QueryTreeNodePtr tryResolveIdentifierFromJoinTree(const IdentifierLookup & identifier_lookup,
IdentifierResolveScope & scope);
IdentifierResolveResult tryResolveIdentifierInParentScopes(const IdentifierLookup & identifier_lookup, IdentifierResolveScope & scope);
IdentifierResolveResult tryResolveIdentifier(const IdentifierLookup & identifier_lookup,
IdentifierResolveScope & scope,
IdentifierResolveSettings identifier_resolve_settings = {});
QueryTreeNodePtr tryResolveIdentifierFromStorage(
const Identifier & identifier,
const QueryTreeNodePtr & table_expression_node,
const AnalysisTableExpressionData & table_expression_data,
IdentifierResolveScope & scope,
size_t identifier_column_qualifier_parts,
bool can_be_not_found = false);
/// Resolve query tree nodes functions
void qualifyColumnNodesWithProjectionNames(const QueryTreeNodes & column_nodes,
@ -362,6 +259,8 @@ private:
/// Global expression node to projection name map
std::unordered_map<QueryTreeNodePtr, ProjectionName> node_to_projection_name;
IdentifierResolver identifier_resolver; // (ctes_in_resolve_process, node_to_projection_name);
/// Global resolve expression node to projection names map
std::unordered_map<QueryTreeNodePtr, ProjectionNames> resolved_expressions;

View File

@ -0,0 +1,71 @@
#pragma once
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/Utils.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/// Used to replace columns that changed type because of JOIN to their original type
class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor<ReplaceColumnsVisitor>
{
public:
explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_, const ContextPtr & context_)
: replacement_map(replacement_map_)
, context(context_)
{}
/// Apply replacement transitively, because column may change it's type twice, one to have a supertype and then because of `joun_use_nulls`
static QueryTreeNodePtr findTransitiveReplacement(QueryTreeNodePtr node, const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_)
{
auto it = replacement_map_.find(node);
QueryTreeNodePtr result_node = nullptr;
for (; it != replacement_map_.end(); it = replacement_map_.find(result_node))
{
if (result_node && result_node->isEqual(*it->second))
{
Strings map_dump;
for (const auto & [k, v]: replacement_map_)
map_dump.push_back(fmt::format("{} -> {} (is_equals: {}, is_same: {})",
k.node->dumpTree(), v->dumpTree(), k.node->isEqual(*v), k.node == v));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Infinite loop in query tree replacement map: {}", fmt::join(map_dump, "; "));
}
chassert(it->second);
result_node = it->second;
}
return result_node;
}
void visitImpl(QueryTreeNodePtr & node)
{
if (auto replacement_node = findTransitiveReplacement(node, replacement_map))
node = replacement_node;
if (auto * function_node = node->as<FunctionNode>(); function_node && function_node->isResolved())
rerunFunctionResolve(function_node, context);
}
/// We want to re-run resolve for function _after_ its arguments are replaced
bool shouldTraverseTopToBottom() const { return false; }
bool needChildVisit(QueryTreeNodePtr & /* parent */, QueryTreeNodePtr & child)
{
/// Visit only expressions, but not subqueries
return child->getNodeType() == QueryTreeNodeType::IDENTIFIER
|| child->getNodeType() == QueryTreeNodeType::LIST
|| child->getNodeType() == QueryTreeNodeType::FUNCTION
|| child->getNodeType() == QueryTreeNodeType::COLUMN;
}
private:
const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map;
const ContextPtr & context;
};
}

View File

@ -22,15 +22,6 @@ include (configure_config.cmake)
configure_file (Common/config.h.in ${CONFIG_INCLUDE_PATH}/config.h)
configure_file (Common/config_version.cpp.in ${CONFIG_INCLUDE_PATH}/config_version.cpp)
if (USE_DEBUG_HELPERS)
get_target_property(MAGIC_ENUM_INCLUDE_DIR ch_contrib::magic_enum INTERFACE_INCLUDE_DIRECTORIES)
# CMake generator expression will do insane quoting when it encounters special character like quotes, spaces, etc.
# Prefixing "SHELL:" will force it to use the original text.
set (INCLUDE_DEBUG_HELPERS "SHELL:-I\"${ClickHouse_SOURCE_DIR}/base\" -I\"${MAGIC_ENUM_INCLUDE_DIR}\" -include \"${ClickHouse_SOURCE_DIR}/src/Core/iostream_debug_helpers.h\"")
# Use generator expression as we don't want to pollute CMAKE_CXX_FLAGS, which will interfere with CMake check system.
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:${INCLUDE_DEBUG_HELPERS}>)
endif ()
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
# If turned ON, this option defines such macro.
# See `src/Common/TargetSpecific.h`

View File

@ -828,7 +828,7 @@ ColumnPtr ColumnArray::filterTuple(const Filter & filt, ssize_t result_size_hint
size_t tuple_size = tuple.tupleSize();
if (tuple_size == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
return filterGeneric(filt, result_size_hint);
Columns temporary_arrays(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
@ -1265,7 +1265,7 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
size_t tuple_size = tuple.tupleSize();
if (tuple_size == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty tuple");
return replicateGeneric(replicate_offsets);
Columns temporary_arrays(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)

View File

@ -1,5 +1,6 @@
#include <Common/Arena.h>
#include <Common/Exception.h>
#include <Common/HashTable/HashSet.h>
#include <Common/HashTable/Hash.h>
#include <Common/RadixSort.h>
#include <Common/SipHash.h>
@ -264,6 +265,23 @@ void ColumnDecimal<T>::updatePermutation(IColumn::PermutationSortDirection direc
}
}
template <is_decimal T>
size_t ColumnDecimal<T>::estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const
{
const size_t range_size = equal_range.size();
if (range_size <= 1)
return range_size;
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
HashSet<T> elements;
for (size_t i = equal_range.from; i < equal_range.to; ++i)
{
size_t permuted_i = permutation[i];
elements.insert(data[permuted_i]);
}
return elements.size();
}
template <is_decimal T>
ColumnPtr ColumnDecimal<T>::permute(const IColumn::Permutation & perm, size_t limit) const
{

View File

@ -97,6 +97,8 @@ public:
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int, IColumn::Permutation & res, EqualRanges& equal_ranges) const override;
size_t estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const override;
MutableColumnPtr cloneResized(size_t size) const override;

View File

@ -5,6 +5,7 @@
#include <IO/WriteHelpers.h>
#include <Common/Arena.h>
#include <Common/HashTable/Hash.h>
#include <Common/HashTable/StringHashSet.h>
#include <Common/SipHash.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
@ -200,6 +201,24 @@ void ColumnFixedString::updatePermutation(IColumn::PermutationSortDirection dire
updatePermutationImpl(limit, res, equal_ranges, ComparatorDescendingStable(*this), comparator_equal, DefaultSort(), DefaultPartialSort());
}
size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
{
const size_t range_size = equal_range.size();
if (range_size <= 1)
return range_size;
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
StringHashSet elements;
bool inserted = false;
for (size_t i = equal_range.from; i < equal_range.to; ++i)
{
size_t permuted_i = permutation[i];
StringRef value = getDataAt(permuted_i);
elements.emplace(value, inserted);
}
return elements.size();
}
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);

View File

@ -142,6 +142,8 @@ public:
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res, EqualRanges & equal_ranges) const override;
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;

View File

@ -3,9 +3,12 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/NumberTraits.h>
#include <Common/HashTable/HashSet.h>
#include <Common/HashTable/HashMap.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include "Storages/IndicesDescription.h"
#include "base/types.h"
#include <base/sort.h>
#include <base/scope_guard.h>
@ -486,6 +489,21 @@ void ColumnLowCardinality::updatePermutationWithCollation(const Collator & colla
updatePermutationImpl(limit, res, equal_ranges, comparator, equal_comparator, DefaultSort(), DefaultPartialSort());
}
size_t ColumnLowCardinality::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
{
const size_t range_size = equal_range.size();
if (range_size <= 1)
return range_size;
HashSet<UInt64> elements;
for (size_t i = equal_range.from; i < equal_range.to; ++i)
{
UInt64 index = getIndexes().getUInt(permutation[i]);
elements.insert(index);
}
return elements.size();
}
std::vector<MutableColumnPtr> ColumnLowCardinality::scatter(ColumnIndex num_columns, const Selector & selector) const
{
auto columns = getIndexes().scatter(num_columns, selector);

View File

@ -145,6 +145,8 @@ public:
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
ColumnPtr replicate(const Offsets & offsets) const override
{
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().replicate(offsets));

View File

@ -1,4 +1,5 @@
#include <Common/Arena.h>
#include <Common/HashTable/StringHashSet.h>
#include <Common/SipHash.h>
#include <Common/assert_cast.h>
#include <Common/WeakHash.h>
@ -621,7 +622,7 @@ void ColumnNullable::updatePermutationImpl(IColumn::PermutationSortDirection dir
if (unlikely(stability == PermutationSortStability::Stable))
{
for (auto & null_range : null_ranges)
::sort(res.begin() + null_range.first, res.begin() + null_range.second);
::sort(std::ranges::next(res.begin(), null_range.from), std::ranges::next(res.begin(), null_range.to));
}
if (is_nulls_last || null_ranges.empty())
@ -660,6 +661,33 @@ void ColumnNullable::updatePermutationWithCollation(const Collator & collator, I
updatePermutationImpl(direction, stability, limit, null_direction_hint, res, equal_ranges, &collator);
}
size_t ColumnNullable::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
{
const size_t range_size = equal_range.size();
if (range_size <= 1)
return range_size;
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
StringHashSet elements;
bool has_null = false;
bool inserted = false;
for (size_t i = equal_range.from; i < equal_range.to; ++i)
{
size_t permuted_i = permutation[i];
if (isNullAt(permuted_i))
{
has_null = true;
}
else
{
StringRef value = getDataAt(permuted_i);
elements.emplace(value, inserted);
}
}
return elements.size() + (has_null ? 1 : 0);
}
void ColumnNullable::reserve(size_t n)
{
getNestedColumn().reserve(n);

View File

@ -109,6 +109,7 @@ public:
size_t limit, int null_direction_hint, Permutation & res) const override;
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int null_direction_hint, Permutation & res, EqualRanges& equal_ranges) const override;
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
void reserve(size_t n) override;
void shrinkToFit() override;
void ensureOwnership() override;

View File

@ -820,6 +820,9 @@ ColumnPtr recursiveRemoveSparse(const ColumnPtr & column)
if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
{
auto columns = column_tuple->getColumns();
if (columns.empty())
return column;
for (auto & element : columns)
element = recursiveRemoveSparse(element);

View File

@ -5,6 +5,7 @@
#include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <Common/Arena.h>
#include <Common/HashTable/StringHashSet.h>
#include <Common/HashTable/Hash.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
@ -481,6 +482,23 @@ void ColumnString::updatePermutationWithCollation(const Collator & collator, Per
DefaultPartialSort());
}
size_t ColumnString::estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const
{
const size_t range_size = equal_range.size();
if (range_size <= 1)
return range_size;
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
StringHashSet elements;
bool inserted = false;
for (size_t i = equal_range.from; i < equal_range.to; ++i)
{
size_t permuted_i = permutation[i];
StringRef value = getDataAt(permuted_i);
elements.emplace(value, inserted);
}
return elements.size();
}
ColumnPtr ColumnString::replicate(const Offsets & replicate_offsets) const
{

View File

@ -260,6 +260,8 @@ public:
void updatePermutationWithCollation(const Collator & collator, IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int, Permutation & res, EqualRanges & equal_ranges) const override;
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
ColumnPtr compress() const override;

View File

@ -3,14 +3,16 @@
#include <Columns/ColumnCompressed.h>
#include <Columns/IColumnImpl.h>
#include <Core/Field.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Common/Arena.h>
#include <Common/WeakHash.h>
#include <Common/assert_cast.h>
#include <Common/iota.h>
#include <Common/typeid_cast.h>
#include <Columns/ColumnsCommon.h>
#include <DataTypes/Serializations/SerializationInfoTuple.h>
#include <IO/Operators.h>
#include <IO/WriteBufferFromString.h>
#include <Processors/Transforms/ColumnGathererTransform.h>
#include <base/sort.h>
@ -23,6 +25,7 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE;
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
}
@ -44,6 +47,9 @@ std::string ColumnTuple::getName() const
ColumnTuple::ColumnTuple(MutableColumns && mutable_columns)
{
if (mutable_columns.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "This function cannot be used to construct empty tuple. It is a bug");
columns.reserve(mutable_columns.size());
for (auto & column : mutable_columns)
{
@ -52,15 +58,21 @@ ColumnTuple::ColumnTuple(MutableColumns && mutable_columns)
columns.push_back(std::move(column));
}
column_length = columns[0]->size();
}
ColumnTuple::ColumnTuple(size_t len) : column_length(len) {}
ColumnTuple::Ptr ColumnTuple::create(const Columns & columns)
{
if (columns.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "This function cannot be used to construct empty tuple. It is a bug");
for (const auto & column : columns)
if (isColumnConst(*column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnTuple cannot have ColumnConst as its element");
auto column_tuple = ColumnTuple::create(MutableColumns());
auto column_tuple = ColumnTuple::create(columns[0]->size());
column_tuple->columns.assign(columns.begin(), columns.end());
return column_tuple;
@ -68,11 +80,14 @@ ColumnTuple::Ptr ColumnTuple::create(const Columns & columns)
ColumnTuple::Ptr ColumnTuple::create(const TupleColumns & columns)
{
if (columns.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "This function cannot be used to construct empty tuple. It is a bug");
for (const auto & column : columns)
if (isColumnConst(*column))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "ColumnTuple cannot have ColumnConst as its element");
auto column_tuple = ColumnTuple::create(MutableColumns());
auto column_tuple = ColumnTuple::create(columns[0]->size());
column_tuple->columns = columns;
return column_tuple;
@ -80,6 +95,9 @@ ColumnTuple::Ptr ColumnTuple::create(const TupleColumns & columns)
MutableColumnPtr ColumnTuple::cloneEmpty() const
{
if (columns.empty())
return ColumnTuple::create(0);
const size_t tuple_size = columns.size();
MutableColumns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
@ -90,6 +108,9 @@ MutableColumnPtr ColumnTuple::cloneEmpty() const
MutableColumnPtr ColumnTuple::cloneResized(size_t new_size) const
{
if (columns.empty())
return ColumnTuple::create(new_size);
const size_t tuple_size = columns.size();
MutableColumns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i)
@ -98,6 +119,16 @@ MutableColumnPtr ColumnTuple::cloneResized(size_t new_size) const
return ColumnTuple::create(std::move(new_columns));
}
size_t ColumnTuple::size() const
{
if (columns.empty())
return column_length;
/// It's difficult to maintain a consistent `column_length` because there
/// are many places that manipulates sub-columns directly.
return columns.at(0)->size();
}
Field ColumnTuple::operator[](size_t n) const
{
Field res;
@ -144,6 +175,7 @@ void ColumnTuple::insert(const Field & x)
if (tuple.size() != tuple_size)
throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple");
++column_length;
for (size_t i = 0; i < tuple_size; ++i)
columns[i]->insert(tuple[i]);
}
@ -181,6 +213,7 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
if (src.columns.size() != tuple_size)
throw Exception(ErrorCodes::CANNOT_INSERT_VALUE_OF_DIFFERENT_SIZE_INTO_TUPLE, "Cannot insert value of different size into tuple");
++column_length;
for (size_t i = 0; i < tuple_size; ++i)
columns[i]->insertFrom(*src.columns[i], n);
}
@ -199,18 +232,28 @@ void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t le
void ColumnTuple::insertDefault()
{
++column_length;
for (auto & column : columns)
column->insertDefault();
}
void ColumnTuple::popBack(size_t n)
{
column_length -= n;
for (auto & column : columns)
column->popBack(n);
}
StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
if (columns.empty())
{
/// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
char * res = arena.allocContinue(1, begin);
*res = 0;
return { res, 1 };
}
StringRef res(begin, 0);
for (const auto & column : columns)
{
@ -232,6 +275,11 @@ char * ColumnTuple::serializeValueIntoMemory(size_t n, char * memory) const
const char * ColumnTuple::deserializeAndInsertFromArena(const char * pos)
{
++column_length;
if (columns.empty())
return pos + 1;
for (auto & column : columns)
pos = column->deserializeAndInsertFromArena(pos);
@ -272,6 +320,7 @@ void ColumnTuple::updateHashFast(SipHash & hash) const
void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
column_length += length;
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i)
columns[i]->insertRangeFrom(
@ -281,6 +330,12 @@ void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t leng
ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) const
{
if (columns.empty())
{
size_t bytes = countBytesInFilter(filt);
return cloneResized(bytes);
}
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
@ -292,12 +347,29 @@ ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) con
void ColumnTuple::expand(const Filter & mask, bool inverted)
{
if (columns.empty())
{
size_t bytes = countBytesInFilter(mask);
if (inverted)
bytes = mask.size() - bytes;
column_length = bytes;
return;
}
for (auto & column : columns)
column->expand(mask, inverted);
}
ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
{
if (columns.empty())
{
if (column_length != perm.size())
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of permutation doesn't match size of column");
return cloneResized(limit ? std::min(column_length, limit) : column_length);
}
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
@ -309,6 +381,14 @@ ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
ColumnPtr ColumnTuple::index(const IColumn & indexes, size_t limit) const
{
if (columns.empty())
{
if (indexes.size() < limit)
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of indexes is less than required");
return cloneResized(limit ? limit : column_length);
}
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
@ -320,6 +400,14 @@ ColumnPtr ColumnTuple::index(const IColumn & indexes, size_t limit) const
ColumnPtr ColumnTuple::replicate(const Offsets & offsets) const
{
if (columns.empty())
{
if (column_length != offsets.size())
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of offsets doesn't match size of column");
return cloneResized(offsets.back());
}
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
@ -331,6 +419,22 @@ ColumnPtr ColumnTuple::replicate(const Offsets & offsets) const
MutableColumns ColumnTuple::scatter(ColumnIndex num_columns, const Selector & selector) const
{
if (columns.empty())
{
if (column_length != selector.size())
throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Size of selector doesn't match size of column");
std::vector<size_t> counts(num_columns);
for (auto idx : selector)
++counts[idx];
MutableColumns res(num_columns);
for (size_t i = 0; i < num_columns; ++i)
res[i] = cloneResized(counts[i]);
return res;
}
const size_t tuple_size = columns.size();
std::vector<MutableColumns> scattered_tuple_elements(tuple_size);
@ -413,6 +517,9 @@ void ColumnTuple::getPermutationImpl(IColumn::PermutationSortDirection direction
res.resize(rows);
iota(res.data(), rows, IColumn::Permutation::value_type(0));
if (columns.empty())
return;
if (limit >= rows)
limit = 0;
@ -429,7 +536,7 @@ void ColumnTuple::updatePermutationImpl(IColumn::PermutationSortDirection direct
for (const auto & column : columns)
{
while (!equal_ranges.empty() && limit && limit <= equal_ranges.back().first)
while (!equal_ranges.empty() && limit && limit <= equal_ranges.back().from)
equal_ranges.pop_back();
if (collator && column->isCollationSupported())
@ -603,6 +710,9 @@ void ColumnTuple::takeDynamicStructureFromSourceColumns(const Columns & source_c
ColumnPtr ColumnTuple::compress() const
{
if (columns.empty())
return Ptr();
size_t byte_size = 0;
Columns compressed;
compressed.reserve(columns.size());

View File

@ -26,6 +26,13 @@ private:
explicit ColumnTuple(MutableColumns && columns);
ColumnTuple(const ColumnTuple &) = default;
/// Empty tuple needs a dedicated field to store its size.
/// This field used *only* for zero-sized tuples.
/// Otherwise `columns[0].size()` should be used to get a size of tuple column
size_t column_length;
/// Dedicated constructor for empty tuples.
explicit ColumnTuple(size_t len);
public:
/** Create immutable column using immutable arguments. This arguments may be shared with other columns.
* Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
@ -39,6 +46,8 @@ public:
requires std::is_rvalue_reference_v<Arg &&>
static MutablePtr create(Arg && arg) { return Base::create(std::forward<Arg>(arg)); }
static MutablePtr create(size_t len_) { return Base::create(len_); }
std::string getName() const override;
const char * getFamilyName() const override { return "Tuple"; }
TypeIndex getDataType() const override { return TypeIndex::Tuple; }
@ -46,10 +55,7 @@ public:
MutableColumnPtr cloneEmpty() const override;
MutableColumnPtr cloneResized(size_t size) const override;
size_t size() const override
{
return columns.at(0)->size();
}
size_t size() const override;
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
@ -117,6 +123,9 @@ public:
bool hasDynamicStructure() const override;
void takeDynamicStructureFromSourceColumns(const Columns & source_columns) override;
/// Empty tuple needs a public method to manage its size.
void addSize(size_t delta) { column_length += delta; }
private:
int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;

View File

@ -14,6 +14,7 @@
#include <Common/Arena.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
#include <Common/HashTable/StringHashSet.h>
#include <Common/NaNUtils.h>
#include <Common/RadixSort.h>
#include <Common/SipHash.h>
@ -413,6 +414,25 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
}
}
template<typename T>
size_t ColumnVector<T>::estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const
{
const size_t range_size = equal_range.size();
if (range_size <= 1)
return range_size;
/// TODO use sampling if the range is too large (e.g. 16k elements, but configurable)
StringHashSet elements;
bool inserted = false;
for (size_t i = equal_range.from; i < equal_range.to; ++i)
{
size_t permuted_i = permutation[i];
StringRef value = getDataAt(permuted_i);
elements.emplace(value, inserted);
}
return elements.size();
}
template <typename T>
MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
{

View File

@ -161,6 +161,8 @@ public:
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res, EqualRanges& equal_ranges) const override;
size_t estimateCardinalityInPermutedRange(const IColumn::Permutation & permutation, const EqualRange & equal_range) const override;
void reserve(size_t n) override
{
data.reserve_exact(n);

View File

@ -83,6 +83,11 @@ ColumnPtr IColumn::createWithOffsets(const Offsets & offsets, const ColumnConst
return res;
}
size_t IColumn::estimateCardinalityInPermutedRange(const IColumn::Permutation & /*permutation*/, const EqualRange & equal_range) const
{
return equal_range.size();
}
void IColumn::forEachSubcolumn(ColumnCallback callback) const
{
const_cast<IColumn*>(this)->forEachSubcolumn([&callback](WrappedPtr & subcolumn)

View File

@ -36,11 +36,19 @@ class Field;
class WeakHash32;
class ColumnConst;
/*
* Represents a set of equal ranges in previous column to perform sorting in current column.
* Used in sorting by tuples.
* */
using EqualRanges = std::vector<std::pair<size_t, size_t> >;
/// A range of column values between row indexes `from` and `to`. The name "equal range" is due to table sorting as its main use case: With
/// a PRIMARY KEY (c_pk1, c_pk2, ...), the first PK column is fully sorted. The second PK column is sorted within equal-value runs of the
/// first PK column, and so on. The number of runs (ranges) per column increases from one primary key column to the next. An "equal range"
/// is a run in a previous column, within the values of the current column can be sorted.
struct EqualRange
{
size_t from; /// inclusive
size_t to; /// exclusive
EqualRange(size_t from_, size_t to_) : from(from_), to(to_) { chassert(from <= to); }
size_t size() const { return to - from; }
};
using EqualRanges = std::vector<EqualRange>;
/// Declares interface to store columns in memory.
class IColumn : public COW<IColumn>
@ -399,6 +407,9 @@ public:
"or for Array or Tuple, containing them.");
}
/// Estimate the cardinality (number of unique values) of the values in 'equal_range' after permutation, formally: |{ column[permutation[r]] : r in equal_range }|.
virtual size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const;
/** Copies each element according offsets parameter.
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
* It is necessary in ARRAY JOIN operation.

View File

@ -60,12 +60,9 @@ ColumnPtr IColumnDummy::filter(const Filter & filt, ssize_t /*result_size_hint*/
return cloneDummy(bytes);
}
void IColumnDummy::expand(const IColumn::Filter & mask, bool inverted)
void IColumnDummy::expand(const IColumn::Filter & mask, bool)
{
size_t bytes = countBytesInFilter(mask);
if (inverted)
bytes = mask.size() - bytes;
s = bytes;
s = mask.size();
}
ColumnPtr IColumnDummy::permute(const Permutation & perm, size_t limit) const

View File

@ -139,7 +139,7 @@ void IColumn::updatePermutationImpl(
if (equal_ranges.empty())
return;
if (limit >= size() || limit > equal_ranges.back().second)
if (limit >= size() || limit > equal_ranges.back().to)
limit = 0;
EqualRanges new_ranges;

View File

@ -77,7 +77,7 @@ INSTANTIATE(IPv6)
#undef INSTANTIATE
template <bool inverted, bool column_is_short, typename Container>
template <bool inverted, typename Container>
static size_t extractMaskNumericImpl(
PaddedPODArray<UInt8> & mask,
const Container & data,
@ -85,42 +85,27 @@ static size_t extractMaskNumericImpl(
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls)
{
if constexpr (!column_is_short)
{
if (data.size() != mask.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
}
if (data.size() != mask.size())
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
size_t ones_count = 0;
size_t data_index = 0;
size_t mask_size = mask.size();
size_t data_size = data.size();
for (size_t i = 0; i != mask_size && data_index != data_size; ++i)
for (size_t i = 0; i != mask_size; ++i)
{
// Change mask only where value is 1.
if (!mask[i])
continue;
UInt8 value;
size_t index;
if constexpr (column_is_short)
{
index = data_index;
++data_index;
}
else
index = i;
if (null_bytemap && (*null_bytemap)[index])
if (null_bytemap && (*null_bytemap)[i])
{
value = null_value;
if (nulls)
(*nulls)[i] = 1;
}
else
value = static_cast<bool>(data[index]);
value = static_cast<bool>(data[i]);
if constexpr (inverted)
value = !value;
@ -131,12 +116,6 @@ static size_t extractMaskNumericImpl(
mask[i] = value;
}
if constexpr (column_is_short)
{
if (data_index != data_size)
throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
}
return ones_count;
}
@ -155,10 +134,7 @@ static bool extractMaskNumeric(
const auto & data = numeric_column->getData();
size_t ones_count;
if (column->size() < mask.size())
ones_count = extractMaskNumericImpl<inverted, true>(mask, data, null_value, null_bytemap, nulls);
else
ones_count = extractMaskNumericImpl<inverted, false>(mask, data, null_value, null_bytemap, nulls);
ones_count = extractMaskNumericImpl<inverted>(mask, data, null_value, null_bytemap, nulls);
mask_info.has_ones = ones_count > 0;
mask_info.has_zeros = ones_count != mask.size();
@ -279,25 +255,32 @@ void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> &
if (!column_function)
return;
size_t original_size = column.column->size();
ColumnWithTypeAndName result;
/// If mask contains only zeros, we can just create
/// an empty column with the execution result type.
if (!mask_info.has_ones)
{
/// If mask contains only zeros, we can just create a column with default values as it will be ignored
auto result_type = column_function->getResultType();
auto empty_column = result_type->createColumn();
result = {std::move(empty_column), result_type, ""};
auto default_column = result_type->createColumnConstWithDefaultValue(original_size)->convertToFullColumnIfConst();
column = {default_column, result_type, ""};
}
/// Filter column only if mask contains zeros.
else if (mask_info.has_zeros)
{
/// If it contains both zeros and ones, we need to execute the function only on the mask values
/// First we filter the column, which creates a new column, then we apply the column, and finally we expand it
/// Expanding is done to keep consistency in function calls (all columns the same size) and it's ok
/// since the values won't be used by `if`
auto filtered = column_function->filter(mask, -1);
result = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
auto filter_after_execution = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
auto mut_column = IColumn::mutate(std::move(filter_after_execution.column));
mut_column->expand(mask, false);
column.column = std::move(mut_column);
}
else
result = column_function->reduce();
column = column_function->reduce();
column = std::move(result);
chassert(column.column->size() == original_size);
}
void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty)

View File

@ -43,12 +43,13 @@ namespace
endpoint,
proxy_scheme,
proxy_port,
cache_ttl
std::chrono::seconds {cache_ttl}
};
return std::make_shared<RemoteProxyConfigurationResolver>(
server_configuration,
request_protocol,
std::make_shared<RemoteProxyHostFetcherImpl>(),
isTunnelingDisabledForHTTPSRequestsOverHTTPProxy(configuration));
}

View File

@ -6,22 +6,47 @@
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Common/logger_useful.h>
#include <Common/DNSResolver.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int RECEIVED_ERROR_FROM_REMOTE_IO_SERVER;
}
std::string RemoteProxyHostFetcherImpl::fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts)
{
auto request = Poco::Net::HTTPRequest(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1);
auto session = makeHTTPSession(HTTPConnectionGroupType::HTTP, endpoint, timeouts);
session->sendRequest(request);
Poco::Net::HTTPResponse response;
auto & response_body_stream = session->receiveResponse(response);
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
throw HTTPException(
ErrorCodes::RECEIVED_ERROR_FROM_REMOTE_IO_SERVER,
endpoint.toString(),
response.getStatus(),
response.getReason(),
"");
std::string proxy_host;
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
return proxy_host;
}
RemoteProxyConfigurationResolver::RemoteProxyConfigurationResolver(
const RemoteServerConfiguration & remote_server_configuration_,
Protocol request_protocol_,
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
bool disable_tunneling_for_https_requests_over_http_proxy_
)
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_), remote_server_configuration(remote_server_configuration_)
: ProxyConfigurationResolver(request_protocol_, disable_tunneling_for_https_requests_over_http_proxy_),
remote_server_configuration(remote_server_configuration_), fetcher(fetcher_)
{
}
@ -29,9 +54,7 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
{
auto logger = getLogger("RemoteProxyConfigurationResolver");
auto & [endpoint, proxy_protocol, proxy_port, cache_ttl_] = remote_server_configuration;
LOG_DEBUG(logger, "Obtain proxy using resolver: {}", endpoint.toString());
auto & [endpoint, proxy_protocol_string, proxy_port, cache_ttl] = remote_server_configuration;
std::lock_guard lock(cache_mutex);
@ -55,66 +78,26 @@ ProxyConfiguration RemoteProxyConfigurationResolver::resolve()
.withSendTimeout(1)
.withReceiveTimeout(1);
try
{
/// It should be just empty GET request.
Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, endpoint.getPath(), Poco::Net::HTTPRequest::HTTP_1_1);
const auto proxy_host = fetcher->fetch(endpoint, timeouts);
const auto & host = endpoint.getHost();
auto resolved_hosts = DNSResolver::instance().resolveHostAll(host);
LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol_string, proxy_host, proxy_port);
HTTPSessionPtr session;
auto proxy_protocol = ProxyConfiguration::protocolFromString(proxy_protocol_string);
for (size_t i = 0; i < resolved_hosts.size(); ++i)
{
auto resolved_endpoint = endpoint;
resolved_endpoint.setHost(resolved_hosts[i].toString());
session = makeHTTPSession(HTTPConnectionGroupType::HTTP, resolved_endpoint, timeouts);
bool use_tunneling_for_https_requests_over_http_proxy = useTunneling(
request_protocol,
proxy_protocol,
disable_tunneling_for_https_requests_over_http_proxy);
try
{
session->sendRequest(request);
break;
}
catch (...)
{
if (i + 1 == resolved_hosts.size())
throw;
}
}
cached_config.protocol = proxy_protocol;
cached_config.host = proxy_host;
cached_config.port = proxy_port;
cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy;
cached_config.original_request_protocol = request_protocol;
cache_timestamp = std::chrono::system_clock::now();
cache_valid = true;
Poco::Net::HTTPResponse response;
auto & response_body_stream = session->receiveResponse(response);
if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Proxy resolver returned not OK status: {}", response.getReason());
String proxy_host;
/// Read proxy host as string from response body.
Poco::StreamCopier::copyToString(response_body_stream, proxy_host);
LOG_DEBUG(logger, "Use proxy: {}://{}:{}", proxy_protocol, proxy_host, proxy_port);
bool use_tunneling_for_https_requests_over_http_proxy = useTunneling(
request_protocol,
cached_config.protocol,
disable_tunneling_for_https_requests_over_http_proxy);
cached_config.protocol = ProxyConfiguration::protocolFromString(proxy_protocol);
cached_config.host = proxy_host;
cached_config.port = proxy_port;
cached_config.tunneling = use_tunneling_for_https_requests_over_http_proxy;
cached_config.original_request_protocol = request_protocol;
cache_timestamp = std::chrono::system_clock::now();
cache_valid = true;
return cached_config;
}
catch (...)
{
tryLogCurrentException("RemoteProxyConfigurationResolver", "Failed to obtain proxy");
return {};
}
return cached_config;
}
void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & config)
@ -124,7 +107,7 @@ void RemoteProxyConfigurationResolver::errorReport(const ProxyConfiguration & co
std::lock_guard lock(cache_mutex);
if (!cache_ttl.count() || !cache_valid)
if (!remote_server_configuration.cache_ttl_.count() || !cache_valid)
return;
if (std::tie(cached_config.protocol, cached_config.host, cached_config.port)

View File

@ -10,6 +10,19 @@
namespace DB
{
struct ConnectionTimeouts;
struct RemoteProxyHostFetcher
{
virtual ~RemoteProxyHostFetcher() = default;
virtual std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) = 0;
};
struct RemoteProxyHostFetcherImpl : public RemoteProxyHostFetcher
{
std::string fetch(const Poco::URI & endpoint, const ConnectionTimeouts & timeouts) override;
};
/*
* Makes an HTTP GET request to the specified endpoint to obtain a proxy host.
* */
@ -22,13 +35,14 @@ public:
Poco::URI endpoint;
String proxy_protocol;
unsigned proxy_port;
unsigned cache_ttl_;
const std::chrono::seconds cache_ttl_;
};
RemoteProxyConfigurationResolver(
const RemoteServerConfiguration & remote_server_configuration_,
Protocol request_protocol_,
bool disable_tunneling_for_https_requests_over_http_proxy_ = true);
std::shared_ptr<RemoteProxyHostFetcher> fetcher_,
bool disable_tunneling_for_https_requests_over_http_proxy_ = false);
ProxyConfiguration resolve() override;
@ -36,11 +50,11 @@ public:
private:
RemoteServerConfiguration remote_server_configuration;
std::shared_ptr<RemoteProxyHostFetcher> fetcher;
std::mutex cache_mutex;
bool cache_valid = false;
std::chrono::time_point<std::chrono::system_clock> cache_timestamp;
const std::chrono::seconds cache_ttl{0};
ProxyConfiguration cached_config;
};

View File

@ -280,6 +280,10 @@ public:
if (!initialized())
abort();
/// Thread cannot join itself.
if (state->thread_id == std::this_thread::get_id())
abort();
state->event.wait();
state.reset();
}
@ -293,12 +297,7 @@ public:
bool joinable() const
{
if (!state)
return false;
/// Thread cannot join itself.
if (state->thread_id == std::this_thread::get_id())
return false;
return true;
return initialized();
}
std::thread::id get_id() const

View File

@ -637,6 +637,9 @@ void TestKeeper::finalize(const String &)
expired = true;
}
/// Signal request_queue to wake up processing thread without waiting for timeout
requests_queue.finish();
processing_thread.join();
try

View File

@ -1,5 +1,4 @@
#include "ZooKeeper.h"
#include "Coordination/KeeperConstants.h"
#include "Coordination/KeeperFeatureFlags.h"
#include "ZooKeeperImpl.h"
#include "KeeperException.h"
@ -376,11 +375,14 @@ void ZooKeeper::createAncestors(const std::string & path)
}
Coordination::Responses responses;
Coordination::Error code = multiImpl(create_ops, responses, /*check_session_valid*/ false);
const auto & [code, failure_reason] = multiImpl(create_ops, responses, /*check_session_valid*/ false);
if (code == Coordination::Error::ZOK)
return;
if (!failure_reason.empty())
throw KeeperException::fromMessage(code, failure_reason);
throw KeeperException::fromPath(code, path);
}
@ -676,17 +678,19 @@ Coordination::Error ZooKeeper::trySet(const std::string & path, const std::strin
}
Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid)
std::pair<Coordination::Error, std::string>
ZooKeeper::multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid)
{
if (requests.empty())
return Coordination::Error::ZOK;
return {Coordination::Error::ZOK, ""};
std::future<Coordination::MultiResponse> future_result;
Coordination::Requests requests_with_check_session;
if (check_session_valid)
{
Coordination::Requests new_requests = requests;
addCheckSessionOp(new_requests);
future_result = asyncTryMultiNoThrow(new_requests);
requests_with_check_session = requests;
addCheckSessionOp(requests_with_check_session);
future_result = asyncTryMultiNoThrow(requests_with_check_session);
}
else
{
@ -696,7 +700,7 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready)
{
impl->finalize(fmt::format("Operation timeout on {} {}", Coordination::OpNum::Multi, requests[0]->getPath()));
return Coordination::Error::ZOPERATIONTIMEOUT;
return {Coordination::Error::ZOPERATIONTIMEOUT, ""};
}
else
{
@ -704,11 +708,14 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
Coordination::Error code = response.error;
responses = response.responses;
std::string reason;
if (check_session_valid)
{
if (code != Coordination::Error::ZOK && !Coordination::isHardwareError(code) && getFailedOpIndex(code, responses) == requests.size())
{
impl->finalize(fmt::format("Session was killed: {}", requests.back()->getPath()));
reason = fmt::format("Session was killed: {}", requests_with_check_session.back()->getPath());
impl->finalize(reason);
code = Coordination::Error::ZSESSIONMOVED;
}
responses.pop_back();
@ -717,23 +724,33 @@ Coordination::Error ZooKeeper::multiImpl(const Coordination::Requests & requests
chassert(code == Coordination::Error::ZOK || Coordination::isHardwareError(code) || responses.back()->error != Coordination::Error::ZOK);
}
return code;
return {code, std::move(reason)};
}
}
Coordination::Responses ZooKeeper::multi(const Coordination::Requests & requests, bool check_session_valid)
{
Coordination::Responses responses;
Coordination::Error code = multiImpl(requests, responses, check_session_valid);
const auto & [code, failure_reason] = multiImpl(requests, responses, check_session_valid);
if (!failure_reason.empty())
throw KeeperException::fromMessage(code, failure_reason);
KeeperMultiException::check(code, requests, responses);
return responses;
}
Coordination::Error ZooKeeper::tryMulti(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid)
{
Coordination::Error code = multiImpl(requests, responses, check_session_valid);
const auto & [code, failure_reason] = multiImpl(requests, responses, check_session_valid);
if (code != Coordination::Error::ZOK && !Coordination::isUserError(code))
{
if (!failure_reason.empty())
throw KeeperException::fromMessage(code, failure_reason);
throw KeeperException(code);
}
return code;
}
@ -1346,7 +1363,7 @@ Coordination::Error ZooKeeper::tryMultiNoThrow(const Coordination::Requests & re
{
try
{
return multiImpl(requests, responses, check_session_valid);
return multiImpl(requests, responses, check_session_valid).first;
}
catch (const Coordination::Exception & e)
{

View File

@ -2,10 +2,8 @@
#include "Types.h"
#include <Poco/Util/LayeredConfiguration.h>
#include <unordered_set>
#include <future>
#include <memory>
#include <mutex>
#include <string>
#include <Common/logger_useful.h>
#include <Common/ProfileEvents.h>
@ -18,7 +16,6 @@
#include <Common/thread_local_rng.h>
#include <Coordination/KeeperFeatureFlags.h>
#include <unistd.h>
#include <random>
namespace ProfileEvents
@ -644,7 +641,11 @@ private:
Coordination::Stat * stat,
Coordination::WatchCallbackPtr watch_callback,
Coordination::ListRequestType list_request_type);
Coordination::Error multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid);
/// returns error code with optional reason
std::pair<Coordination::Error, std::string>
multiImpl(const Coordination::Requests & requests, Coordination::Responses & responses, bool check_session_valid);
Coordination::Error existsImpl(const std::string & path, Coordination::Stat * stat_, Coordination::WatchCallback watch_callback);
Coordination::Error syncImpl(const std::string & path, std::string & returned_path);

View File

@ -0,0 +1,172 @@
#include <gtest/gtest.h>
#include <Common/RemoteProxyConfigurationResolver.h>
#include <Poco/URI.h>
#include <IO/ConnectionTimeouts.h>
#include <base/sleep.h>
namespace
{
struct RemoteProxyHostFetcherMock : public DB::RemoteProxyHostFetcher
{
explicit RemoteProxyHostFetcherMock(const std::string & return_mock_) : return_mock(return_mock_) {}
std::string fetch(const Poco::URI &, const DB::ConnectionTimeouts &) override
{
fetch_count++;
return return_mock;
}
std::string return_mock;
std::size_t fetch_count {0};
};
}
namespace DB
{
TEST(RemoteProxyConfigurationResolver, HTTPOverHTTP)
{
const char * proxy_server_mock = "proxy1";
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
{
Poco::URI("not_important"),
"http",
80,
std::chrono::seconds {10}
};
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTP,
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, proxy_server_mock);
ASSERT_EQ(configuration.port, 80);
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.tunneling, false);
}
TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPS)
{
const char * proxy_server_mock = "proxy1";
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
{
Poco::URI("not_important"),
"https",
443,
std::chrono::seconds {10}
};
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTPS,
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, proxy_server_mock);
ASSERT_EQ(configuration.port, 443);
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTPS);
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS);
// tunneling should not be used, https over https.
ASSERT_EQ(configuration.tunneling, false);
}
TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTP)
{
const char * proxy_server_mock = "proxy1";
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
{
Poco::URI("not_important"),
"http",
80,
std::chrono::seconds {10}
};
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTPS,
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock)
);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, proxy_server_mock);
ASSERT_EQ(configuration.port, 80);
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS);
// tunneling should be used, https over http.
ASSERT_EQ(configuration.tunneling, true);
}
TEST(RemoteProxyConfigurationResolver, HTTPSOverHTTPNoTunneling)
{
const char * proxy_server_mock = "proxy1";
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
{
Poco::URI("not_important"),
"http",
80,
std::chrono::seconds {10}
};
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTPS,
std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock),
true /* disable_tunneling_for_https_requests_over_http_proxy_ */
);
auto configuration = resolver.resolve();
ASSERT_EQ(configuration.host, proxy_server_mock);
ASSERT_EQ(configuration.port, 80);
ASSERT_EQ(configuration.protocol, ProxyConfiguration::Protocol::HTTP);
ASSERT_EQ(configuration.original_request_protocol, ProxyConfiguration::Protocol::HTTPS);
// tunneling should be used, https over http.
ASSERT_EQ(configuration.tunneling, false);
}
TEST(RemoteProxyConfigurationResolver, SimpleCacheTest)
{
const char * proxy_server_mock = "proxy1";
auto cache_ttl = 5u;
auto remote_server_configuration = RemoteProxyConfigurationResolver::RemoteServerConfiguration
{
Poco::URI("not_important"),
"http",
80,
std::chrono::seconds {cache_ttl}
};
auto fetcher_mock = std::make_shared<RemoteProxyHostFetcherMock>(proxy_server_mock);
RemoteProxyConfigurationResolver resolver(
remote_server_configuration,
ProxyConfiguration::Protocol::HTTP,
fetcher_mock
);
resolver.resolve();
resolver.resolve();
resolver.resolve();
ASSERT_EQ(fetcher_mock->fetch_count, 1u);
sleepForSeconds(cache_ttl * 2);
resolver.resolve();
ASSERT_EQ(fetcher_mock->fetch_count, 2);
}
}

View File

@ -129,7 +129,6 @@ class IColumn;
M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \
M(Bool, s3queue_allow_experimental_sharded_mode, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten", 0) \
M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
@ -243,7 +242,8 @@ class IColumn;
M(Bool, do_not_merge_across_partitions_select_final, false, "Merge parts only in one partition in select final", 0) \
M(Bool, split_parts_ranges_into_intersecting_and_non_intersecting_final, true, "Split parts ranges into intersecting and non intersecting during FINAL optimization", 0) \
M(Bool, split_intersecting_parts_ranges_into_layers_final, true, "Split intersecting parts ranges into layers during FINAL optimization", 0) \
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \
M(Bool, allow_experimental_inverted_index, false, "If it is set to true, allow to use experimental inverted index.", 0) \
M(Bool, allow_experimental_full_text_index, false, "If it is set to true, allow to use experimental full-text index.", 0) \
\
M(UInt64, mysql_max_rows_to_insert, 65536, "The maximum number of rows in MySQL batch insertion of the MySQL storage engine", 0) \
M(Bool, mysql_map_string_to_text_in_show_columns, true, "If enabled, String type will be mapped to TEXT in SHOW [FULL] COLUMNS, BLOB otherwise. Has an effect only when the connection is made through the MySQL wire protocol.", 0) \
@ -260,6 +260,8 @@ class IColumn;
M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \
M(Bool, use_skip_indexes_if_final, false, "If query has FINAL, then skipping data based on indexes may produce incorrect result, hence disabled by default.", 0) \
M(Bool, materialize_skip_indexes_on_insert, true, "If true skip indexes are calculated on inserts, otherwise skip indexes will be calculated only during merges", 0) \
M(Bool, materialize_statistics_on_insert, true, "If true statistics are calculated on inserts, otherwise statistics will be calculated only during merges", 0) \
M(String, ignore_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be excluded during query execution.", 0) \
\
M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \
@ -961,6 +963,7 @@ class IColumn;
MAKE_OBSOLETE(M, UInt64, partial_merge_join_optimizations, 0) \
MAKE_OBSOLETE(M, MaxThreads, max_alter_threads, 0) \
MAKE_OBSOLETE(M, Bool, use_mysql_types_in_show_columns, false) \
MAKE_OBSOLETE(M, Bool, s3queue_allow_experimental_sharded_mode, false) \
/* moved to config.xml: see also src/Core/ServerSettings.h */ \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_buffer_flush_schedule_pool_size, 16) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_pool_size, 16) \

View File

@ -85,10 +85,13 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"24.6", {{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
{"24.6", {{"materialize_skip_indexes_on_insert", true, true, "Added new setting to allow to disable materialization of skip indexes on insert"},
{"materialize_statistics_on_insert", true, true, "Added new setting to allow to disable materialization of statistics on insert"},
{"input_format_parquet_use_native_reader", false, false, "When reading Parquet files, to use native reader instead of arrow reader."},
{"hdfs_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in HDFS engine instead of empty query result"},
{"azure_throw_on_zero_files_match", false, false, "Allow to throw an error when ListObjects request cannot match any files in AzureBlobStorage engine instead of empty query result"},
{"s3_validate_request_settings", true, true, "Allow to disable S3 request settings validation"},
{"allow_experimental_full_text_index", false, false, "Enable experimental full-text index"},
{"azure_skip_empty_files", false, false, "Allow to skip empty files in azure table engine"},
{"hdfs_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in HDFS table engine"},
{"azure_ignore_file_doesnt_exist", false, false, "Allow to return 0 rows when the requested files don't exist instead of throwing an exception in AzureBlobStorage table engine"},

View File

@ -1,149 +0,0 @@
#include "iostream_debug_helpers.h"
#include <iostream>
#include <Client/Connection.h>
#include <Core/Block.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Core/Field.h>
#include <Core/NamesAndTypes.h>
#include <DataTypes/IDataType.h>
#include <Functions/IFunction.h>
#include <IO/WriteBufferFromOStream.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/ExpressionActions.h>
#include <Parsers/IAST.h>
#include <Storages/IStorage.h>
#include <Common/COW.h>
#include <Common/FieldVisitorDump.h>
namespace DB
{
template <>
std::ostream & operator<< <Field>(std::ostream & stream, const Field & what)
{
stream << applyVisitor(FieldVisitorDump(), what);
return stream;
}
std::ostream & operator<<(std::ostream & stream, const NameAndTypePair & what)
{
stream << "NameAndTypePair(name = " << what.name << ", type = " << what.type << ")";
return stream;
}
std::ostream & operator<<(std::ostream & stream, const IDataType & what)
{
stream << "IDataType(name = " << what.getName() << ", default = " << what.getDefault() << ")";
return stream;
}
std::ostream & operator<<(std::ostream & stream, const IStorage & what)
{
auto table_id = what.getStorageID();
stream << "IStorage(name = " << what.getName() << ", tableName = " << table_id.table_name << ") {"
<< what.getInMemoryMetadataPtr()->getColumns().getAllPhysical().toString() << "}";
return stream;
}
std::ostream & operator<<(std::ostream & stream, const TableLockHolder &)
{
stream << "TableStructureReadLock()";
return stream;
}
std::ostream & operator<<(std::ostream & stream, const IFunctionOverloadResolver & what)
{
stream << "IFunction(name = " << what.getName() << ", variadic = " << what.isVariadic() << ", args = " << what.getNumberOfArguments()
<< ")";
return stream;
}
std::ostream & operator<<(std::ostream & stream, const Block & what)
{
stream << "Block("
<< "num_columns = " << what.columns() << "){" << what.dumpStructure() << "}";
return stream;
}
std::ostream & operator<<(std::ostream & stream, const ColumnWithTypeAndName & what)
{
stream << "ColumnWithTypeAndName(name = " << what.name << ", type = " << *what.type << ", column = ";
return dumpValue(stream, what.column) << ")";
}
std::ostream & operator<<(std::ostream & stream, const IColumn & what)
{
stream << "IColumn(" << what.dumpStructure() << ")";
stream << "{";
for (size_t i = 0; i < what.size(); ++i)
{
if (i)
stream << ", ";
stream << applyVisitor(FieldVisitorDump(), what[i]);
}
stream << "}";
return stream;
}
std::ostream & operator<<(std::ostream & stream, const Packet & what)
{
stream << "Packet("
<< "type = " << what.type;
// types description: Core/Protocol.h
if (what.exception)
stream << "exception = " << what.exception.get();
// TODO: profile_info
stream << ") {" << what.block << "}";
return stream;
}
std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what)
{
stream << "ExpressionActions(" << what.dumpActions() << ")";
return stream;
}
std::ostream & operator<<(std::ostream & stream, const TreeRewriterResult & what)
{
stream << "SyntaxAnalyzerResult{";
stream << "storage=" << what.storage << "; ";
if (!what.source_columns.empty())
{
stream << "source_columns=";
dumpValue(stream, what.source_columns);
stream << "; ";
}
if (!what.aliases.empty())
{
stream << "aliases=";
dumpValue(stream, what.aliases);
stream << "; ";
}
if (!what.array_join_result_to_source.empty())
{
stream << "array_join_result_to_source=";
dumpValue(stream, what.array_join_result_to_source);
stream << "; ";
}
if (!what.array_join_alias_to_name.empty())
{
stream << "array_join_alias_to_name=";
dumpValue(stream, what.array_join_alias_to_name);
stream << "; ";
}
if (!what.array_join_name_to_alias.empty())
{
stream << "array_join_name_to_alias=";
dumpValue(stream, what.array_join_name_to_alias);
stream << "; ";
}
stream << "rewrite_subqueries=" << what.rewrite_subqueries << "; ";
stream << "}";
return stream;
}
}

View File

@ -1,49 +0,0 @@
#pragma once
#include <iostream>
namespace DB
{
// Use template to disable implicit casting for certain overloaded types such as Field, which leads
// to overload resolution ambiguity.
class Field;
template <typename T>
requires std::is_same_v<T, Field>
std::ostream & operator<<(std::ostream & stream, const T & what);
struct NameAndTypePair;
std::ostream & operator<<(std::ostream & stream, const NameAndTypePair & what);
class IDataType;
std::ostream & operator<<(std::ostream & stream, const IDataType & what);
class IStorage;
std::ostream & operator<<(std::ostream & stream, const IStorage & what);
class IFunctionOverloadResolver;
std::ostream & operator<<(std::ostream & stream, const IFunctionOverloadResolver & what);
class IFunctionBase;
std::ostream & operator<<(std::ostream & stream, const IFunctionBase & what);
class Block;
std::ostream & operator<<(std::ostream & stream, const Block & what);
struct ColumnWithTypeAndName;
std::ostream & operator<<(std::ostream & stream, const ColumnWithTypeAndName & what);
class IColumn;
std::ostream & operator<<(std::ostream & stream, const IColumn & what);
struct Packet;
std::ostream & operator<<(std::ostream & stream, const Packet & what);
class ExpressionActions;
std::ostream & operator<<(std::ostream & stream, const ExpressionActions & what);
struct TreeRewriterResult;
std::ostream & operator<<(std::ostream & stream, const TreeRewriterResult & what);
}
/// some operator<< should be declared before operator<<(... std::shared_ptr<>)
#include <base/iostream_debug_helpers.h>

View File

@ -742,6 +742,7 @@ std::string BaseDaemon::getDefaultConfigFileName() const
void BaseDaemon::closeFDs()
{
#if !defined(USE_XRAY)
/// NOTE: may benefit from close_range() (linux 5.9+)
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
fs::path proc_path{"/dev/fd"};
@ -789,13 +790,13 @@ void BaseDaemon::closeFDs()
}
}
}
#endif
}
void BaseDaemon::initialize(Application & self)
{
closeFDs();
ServerApplication::initialize(self);
/// now highest priority (lowest value) is PRIO_APPLICATION = -100, we want higher!

View File

@ -75,6 +75,9 @@ ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column)
else if (const auto * column_tuple = typeid_cast<const ColumnTuple *>(column.get()))
{
auto columns = column_tuple->getColumns();
if (columns.empty())
return column;
for (auto & element : columns)
element = recursiveRemoveLowCardinality(element);
res = ColumnTuple::create(columns);

View File

@ -29,7 +29,6 @@ namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int DUPLICATE_COLUMN;
extern const int EMPTY_DATA_PASSED;
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int SIZES_OF_COLUMNS_IN_TUPLE_DOESNT_MATCH;
@ -181,6 +180,9 @@ static void addElementSafe(const DataTypes & elems, IColumn & column, F && impl)
MutableColumnPtr DataTypeTuple::createColumn() const
{
if (elems.empty())
return ColumnTuple::create(0);
size_t size = elems.size();
MutableColumns tuple_columns(size);
for (size_t i = 0; i < size; ++i)
@ -206,6 +208,9 @@ MutableColumnPtr DataTypeTuple::createColumn(const ISerialization & serializatio
if (!serialization_tuple)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected serialization to create column of type Tuple");
if (elems.empty())
return IDataType::createColumn(serialization);
const auto & element_serializations = serialization_tuple->getElementsSerializations();
size_t size = elems.size();
@ -224,6 +229,12 @@ Field DataTypeTuple::getDefault() const
void DataTypeTuple::insertDefaultInto(IColumn & column) const
{
if (elems.empty())
{
column.insertDefault();
return;
}
addElementSafe(elems, column, [&]
{
for (const auto & i : collections::range(0, elems.size()))
@ -388,7 +399,7 @@ void DataTypeTuple::forEachChild(const ChildCallback & callback) const
static DataTypePtr create(const ASTPtr & arguments)
{
if (!arguments || arguments->children.empty())
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Tuple cannot be empty");
return std::make_shared<DataTypeTuple>(DataTypes{});
DataTypes nested_types;
nested_types.reserve(arguments->children.size());

View File

@ -20,7 +20,6 @@ namespace DB
namespace ErrorCodes
{
extern const int EMPTY_DATA_PASSED;
extern const int NOT_IMPLEMENTED;
}
@ -146,9 +145,6 @@ DataTypePtr FieldToDataType<on_error>::operator() (const Array & x) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Tuple & tuple) const
{
if (tuple.empty())
throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot infer type of an empty tuple");
DataTypes element_types;
element_types.reserve(tuple.size());

View File

@ -229,9 +229,10 @@ static std::pair<ColumnPtr, DataTypePtr> recursivlyConvertDynamicColumnToTuple(
= recursivlyConvertDynamicColumnToTuple(tuple_columns[i], tuple_types[i]);
}
auto new_column = tuple_size == 0 ? column : ColumnPtr(ColumnTuple::create(new_tuple_columns));
return
{
ColumnTuple::create(new_tuple_columns),
new_column,
recreateTupleWithElements(*type_tuple, new_tuple_types)
};
}

View File

@ -70,13 +70,15 @@ void SerializationInfoTuple::add(const SerializationInfo & other)
void SerializationInfoTuple::addDefaults(size_t length)
{
SerializationInfo::addDefaults(length);
for (const auto & elem : elems)
elem->addDefaults(length);
}
void SerializationInfoTuple::replaceData(const SerializationInfo & other)
{
SerializationInfo::add(other);
SerializationInfo::replaceData(other);
const auto & other_info = assert_cast<const SerializationInfoTuple &>(other);
for (const auto & [name, elem] : name_to_elem)
@ -94,7 +96,9 @@ MutableSerializationInfoPtr SerializationInfoTuple::clone() const
for (const auto & elem : elems)
elems_cloned.push_back(elem->clone());
return std::make_shared<SerializationInfoTuple>(std::move(elems_cloned), names, settings);
auto ret = std::make_shared<SerializationInfoTuple>(std::move(elems_cloned), names, settings);
ret->data = data;
return ret;
}
MutableSerializationInfoPtr SerializationInfoTuple::createWithType(

View File

@ -91,6 +91,10 @@ static ReturnType addElementSafe(size_t num_elems, IColumn & column, F && impl)
restore_elements();
return ReturnType(false);
}
else
{
assert_cast<ColumnTuple &>(column).addSize(1);
}
// Check that all columns now have the same size.
size_t new_size = column.size();
@ -564,6 +568,12 @@ void SerializationTuple::enumerateStreams(
const StreamCallback & callback,
const SubstreamData & data) const
{
if (elems.empty())
{
ISerialization::enumerateStreams(settings, callback, data);
return;
}
const auto * type_tuple = data.type ? &assert_cast<const DataTypeTuple &>(*data.type) : nullptr;
const auto * column_tuple = data.column ? &assert_cast<const ColumnTuple &>(*data.column) : nullptr;
const auto * info_tuple = data.serialization_info ? &assert_cast<const SerializationInfoTuple &>(*data.serialization_info) : nullptr;
@ -626,6 +636,22 @@ void SerializationTuple::serializeBinaryBulkWithMultipleStreams(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
if (elems.empty())
{
if (WriteBuffer * stream = settings.getter(settings.path))
{
size_t size = column.size();
if (limit == 0 || offset + limit > size)
limit = size - offset;
for (size_t i = 0; i < limit; ++i)
stream->write('0');
}
return;
}
auto * tuple_state = checkAndGetState<SerializeBinaryBulkStateTuple>(state);
for (size_t i = 0; i < elems.size(); ++i)
@ -642,6 +668,24 @@ void SerializationTuple::deserializeBinaryBulkWithMultipleStreams(
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const
{
if (elems.empty())
{
auto cached_column = getFromSubstreamsCache(cache, settings.path);
if (cached_column)
{
column = cached_column;
}
else if (ReadBuffer * stream = settings.getter(settings.path))
{
auto mutable_column = column->assumeMutable();
typeid_cast<ColumnTuple &>(*mutable_column).addSize(stream->tryIgnore(limit));
column = std::move(mutable_column);
addToSubstreamsCache(cache, settings.path, column);
}
return;
}
auto * tuple_state = checkAndGetState<DeserializeBinaryBulkStateTuple>(state);
auto mutable_column = column->assumeMutable();
@ -650,6 +694,8 @@ void SerializationTuple::deserializeBinaryBulkWithMultipleStreams(
settings.avg_value_size_hint = 0;
for (size_t i = 0; i < elems.size(); ++i)
elems[i]->deserializeBinaryBulkWithMultipleStreams(column_tuple.getColumnPtr(i), limit, settings, tuple_state->states[i], cache);
typeid_cast<ColumnTuple &>(*mutable_column).addSize(column_tuple.getColumn(0).size());
}
size_t SerializationTuple::getPositionByName(const String & name) const

View File

@ -1,4 +1,3 @@
#include <Columns/IColumn.h>
#include <Core/Field.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/IDataType.h>
@ -10,8 +9,6 @@
#include <string>
#include <vector>
#include <Core/iostream_debug_helpers.h>
template <typename T>
inline std::ostream& operator<<(std::ostream & ostr, const std::vector<T> & v)
@ -63,7 +60,7 @@ TEST_P(ParseDataTypeTest, parseStringValue)
data_type->getDefaultSerialization()->deserializeWholeText(*col, buffer, FormatSettings{});
}
ASSERT_EQ(p.expected_values.size(), col->size()) << "Actual items: " << *col;
ASSERT_EQ(p.expected_values.size(), col->size());
for (size_t i = 0; i < col->size(); ++i)
{
ASSERT_EQ(p.expected_values[i], (*col)[i]);

Some files were not shown because too many files have changed in this diff Show More